From 9b9f0efb6e6fd6a99bd11468e1d1b3b3d990f3cc Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Mon, 6 Jun 2022 22:36:43 +0100 Subject: [PATCH] draft non-synchronous NuBus (using sampling at sysclk to cut down on latency), minimalist support for 1/2/4 accel --- .../NuBusFPGAInit/NuBusFPGAInit.c | 443 ++++------------- .../NuBusFPGAInit/NuBusFPGA_QD.h | 303 ++++++++++++ nubus-to-ztex-gateware/blit.c | 40 +- nubus-to-ztex-gateware/nubus_full.py | 2 +- nubus-to-ztex-gateware/nubus_full_sampling.py | 445 ++++++++++++++++++ nubus-to-ztex-gateware/nubus_to_fpga_soc.py | 12 +- nubus-to-ztex-gateware/slave_tb.sv | 8 +- 7 files changed, 869 insertions(+), 384 deletions(-) create mode 100644 nubus-to-ztex-gateware/NuBusFPGAInit/NuBusFPGA_QD.h create mode 100644 nubus-to-ztex-gateware/nubus_full_sampling.py diff --git a/nubus-to-ztex-gateware/NuBusFPGAInit/NuBusFPGAInit.c b/nubus-to-ztex-gateware/NuBusFPGAInit/NuBusFPGAInit.c index cf3c69e..82be0e6 100644 --- a/nubus-to-ztex-gateware/NuBusFPGAInit/NuBusFPGAInit.c +++ b/nubus-to-ztex-gateware/NuBusFPGAInit/NuBusFPGAInit.c @@ -42,310 +42,13 @@ static inline unsigned long brev(const unsigned long r) { #define uint16_t unsigned short #define uint32_t unsigned long -// this is the stack frame upon entry in BitBlt as described in DravingVars.a -// wrong order as it's going in negative offset -struct qdstuff_order { - // STACK FRAME VARS USED BY SEEKMASK (CALLED BY STRETCHBITS, RGNBLT, DRAWARC, DRAWLINE) - // (NOT USED IN PATEXPAND) - - uint16_t RECTFLAG; // // EQU -2 ;WORD - uint16_t VERT; // // RECTFLAG-2 - uint32_t RGNBUFFER; // // VERT-4 - uint32_t RUNBUF; // // RGNBUFFER-4 - uint16_t BUFLEFT; // // RUNBUF-2 - uint16_t BUFSIZE; // // BUFLEFT-2 - uint32_t EXRTN; // // BUFSIZE-4 - uint32_t RUNRTN; // // EXRTN-4 - uint32_t SEEKMASK; // // RUNRTN-4 - uint32_t DSTMASKBUF; // // SEEKMASK-4 - uint32_t DSTMASKALIGN; // // DSTMASKBUF-4 - uint8_t STATEA[24]; // STATE RECORD // DSTMASKALIGN-RGNREC - uint8_t STATEB[24]; // STATE RECORD // STATEA-RGNREC - uint8_t STATEC[24]; // STATE RECORD // STATEB-RGNREC - uint16_t MINRECT[4]; // // STATEC-8 - uint16_t DSTSHIFT; // // MINRECT-2 - uint16_t RUNBUMP; // // DSTSHIFT-2 - uint32_t DSTROW; // // RUNBUMP-4 - uint32_t GoShow; // Go home and show crsr // DSTROW-4 - uint32_t STACKFREE; // -> // GoShow-4 - - // STACK FRAME VARS USED BY PATEXPAND, COLORMAP, DRAWSLAB - // (CALLED BY STRETCHBITS, RGNBLT, BITBLT, DRAWARC, DRAWLINE) - - // SET UP FOR BITBLT FOR RGNBLT - uint32_t EXPAT; // YES // STACKFREE-4 - uint16_t PATVMASK; // (must follow expat) // EXPAT-2 - uint16_t PATHMASK; // (must follow PATVMASK) // PATVMASK-2 - uint16_t PATROW; // (must follow PATHMASK) // PATHMASK-2 - uint16_t PATHPOS; // YES // PATROW-2 - uint8_t filler5; // <8> YES // PATHPOS-1 - uint8_t alphaMode; // <8> // filler5-1 - uint32_t PATVPOS; // <8> YES // alphaMode-4 - uint16_t LOCMODE; // YES // PATVPOS-2 - uint32_t LOCPAT; // YES // LOCMODE-4 - uint32_t FCOLOR; // YES // LOCPAT-4 - uint32_t BCOLOR; // YES // FCOLOR-4 - uint8_t useDither; // // BCOLOR-1 ;(was pixsrc) reclaimed 07Jul88 - uint8_t NEWPATTERN; // YES // useDither-1 - uint8_t DSTPIX[78]; // +COLOR TABLE YES -> STACKFREE -54-(50+8) // NEWPATTERN- - - uint16_t weight[3]; // weight for averaging // DSTPIX-6 //uint16_t pin[3]; // used by max, min // weight - uint16_t notWeight[3]; // complement of weight (for average) // weight-6 - uint8_t multiColor; // set if source contains nonblack/white colors // notWeight-1 - uint8_t MMUsave; // MMU mode on entry to QD // multiColor-1 - uint8_t FGnotBlack; // / true if forecolor - black // MMUsave-1 - uint8_t BGnotWhite; // \ true if backcolor - white (must follow FGBlack) // FGnotBlack-1 - uint32_t colorTable; // pointer to color table // BGnotWhite-4 - uint32_t invColor; // pointer to inverse color table // colorTable-4 - uint16_t invSize; // resolution of inverse color table // invColor-2 - uint16_t rtShift; // used by average how far to shift // invSize-2 - uint32_t transColor; // copy of backcolor for transparent // rtShift-4 - uint32_t hilitColor; // hilite color pixels-> DSTPIX-36 // transColor-4 - - // MORE SHARED STACK FRAME VARS (STRETCHBITS, RGNBLT, BITBLT) - - uint16_t alignSrcPM; // // hilitColor-2 - uint8_t SRCPIX[78]; // YES // alignSrcPM- - uint16_t alignMaskPM; // // SRCPIX-2 - uint8_t MASKPIX[78]; // YES // alignMaskPM- - uint32_t SRCROW; // YES // MASKPIX-4 - uint32_t MASKROW; // YES // SRCROW-4 - uint16_t SRCSHIFT; // YES // MASKROW-2 - uint16_t MASKSHIFT; // YES // SRCSHIFT-2 - uint32_t INVERTFLAG; // YES // MASKSHIFT-4 - uint32_t SAVESTK; // YES // INVERTFLAG-4 - uint32_t SAVEA5; // YES // SAVESTK-4 - - uint32_t SRCBUF; // // SAVEA5-4 - uint32_t DSTBUF; // // SRCBUF-4 - uint32_t SCALEBUF; // // DSTBUF-4 - uint32_t dstBufBump; // // SCALEBUF-4 - uint32_t scaleBufBump; // // dstBufBump-4 - uint32_t SRCMASKBUF; // // scaleBufBump-4 - uint16_t filler1; // // SRCMASKBUF-2 - uint16_t SRCLONGS; // // filler1-2 - uint16_t SRCMASKLONGS; // // SRCLONGS-2 - uint16_t DSTMASKLONGS; // // SRCMASKLONGS-2 - uint16_t DSTLONGS; // // DSTMASKLONGS-2 - uint16_t SCALELONGS; // // DSTLONGS-2 - uint32_t SRCADDR; // // SCALELONGS-4 - uint32_t MASKADDR; // // SRCADDR-4 - uint32_t DSTADDR; // // MASKADDR-4 - uint32_t SRCLIMIT; // // DSTADDR-4 - uint16_t NUMER[2]; // // SRCLIMIT-4 - uint16_t DENOM[2]; // // NUMER-4 - uint16_t MASKNUMER[2]; // // DENOM-4 - uint16_t MASKDENOM[2]; // // MASKNUMER-4 - uint32_t MODECASE; // -> hilitColor-140-2*(PMREC+CTREC) (50+8) -> -256 -> DSTPIX -292 // MASKDENOM-4 - - // STACK FRAME VARS USED BY STRETCHBITS ONLY - - uint32_t RATIOCASE; // // MODECASE-4 - uint32_t MASKCASE; // // RATIOCASE-4 - uint16_t HORIZFRACTION; // // MASKCASE-2 - uint16_t MASKFRACT; // // HORIZFRACTION-2 - uint32_t SCALECASE; // // MASKFRACT-4 - uint16_t SRCSCANS; // // SCALECASE-2 - uint16_t SRCPIXCNT; // // SRCSCANS-2 - uint32_t SRCALIGN; // // SRCPIXCNT-4 - uint32_t DSTALIGN; // // SRCALIGN-4 - uint32_t MASKALIGN; // // DSTALIGN-4 - uint32_t ScaleTbl; // // MASKALIGN-4 - uint16_t VERROR; // // ScaleTbl-2 - uint16_t CRSRFLAG; // // VERROR-2 - uint32_t REALBOUNDS; // -> MODECASE-44 -> DSTPIX-336 // CRSRFLAG-4 - - - // STACK FRAME VARS USED BY RGNBLT ONLY - - uint16_t FIRSTV; // // REALBOUNDS-2 - uint16_t LASTV; // // FIRSTV-2 - uint16_t VBUMP; // , MUST BE ABOVE HBUMP // LASTV-2 - uint16_t HBUMP; // // VBUMP-2 - uint32_t RGNADDR; // // HBUMP-4 - uint16_t filler2; // // RGNADDR-2 - uint16_t SRCSIZE; // // filler2-2 - uint32_t SAVESTK2; // -> REALBOUNDS-20 -> DSTPIX-356 // SRCSIZE-4 - - - // STACK FRAME VARS USED BY BITBLT ONLY - - uint16_t SRCV; // // SAVESTK2-2 - uint16_t DSTV; // // SRCV-2 - uint16_t SRCBUMP; // // DSTV-2 - uint16_t HEIGHT; // // SRCBUMP-2 - uint16_t SRCRECT2[4]; // -> SAVESTK2-16 -> DSTPIX-372 // HEIGHT-8 - uint32_t FIRSTMASK; // // SRCRECT2-4 - uint16_t LONGCNT; // // FIRSTMASK-2 - - - // STACK FRAME VARS USED BY RGNBLT/BITBLT - - uint8_t doneMid; // two flags used to control loop // LONGCNT-1 - uint8_t endSwitch; // three-way switch chooses from src, pat, bigpat // doneMid-1 - uint32_t lastMask; // mask for last long blitted on line // endSwitch-4 - uint16_t midCount; // # of pixels on line less mask longs - 1 // lastMask-2 - uint16_t pixInLong; // # of pixels in a long - 1 // midCount-2 - uint32_t patOffset; // pattern horizontal initial offset // pixInLong-4 - uint16_t patPos; // pattern vertical offset // patOffset-2 - uint16_t destPixCnt; // 1-based cnt of pixels to blit<02Mar89 BAL> // patPos-2 - uint32_t destPixOffset; // destination pixel offset <08Jan89 BAL> // destPixCnt-4 - uint16_t pixInLong1; // same as pixInLong, 1 based (for transparent) // destPixOffset-2 - uint16_t longBump; // 32 signed direction of blit (for transparent) // pixInLong1-2 -}; - -// same as above, but lines are in reverse order so it can be used directly once the pointer to the stack frame is known -// some types have been hand-converted (e.g. MINRECT to Rect) -struct qdstuff { - uint16_t longBump; // 32 signed direction of blit (for transparent) // pixInLong1-2 - uint16_t pixInLong1; // same as pixInLong, 1 based (for transparent) // destPixOffset-2 - uint32_t destPixOffset; // destination pixel offset <08Jan89 BAL> // destPixCnt-4 - uint16_t destPixCnt; // 1-based cnt of pixels to blit<02Mar89 BAL> // patPos-2 - uint16_t patPos; // pattern vertical offset // patOffset-2 - uint32_t patOffset; // pattern horizontal initial offset // pixInLong-4 - uint16_t pixInLong; // # of pixels in a long - 1 // midCount-2 - uint16_t midCount; // # of pixels on line less mask longs - 1 // lastMask-2 - uint32_t lastMask; // mask for last long blitted on line // endSwitch-4 - uint8_t endSwitch; // three-way switch chooses from src, pat, bigpat // doneMid-1 - uint8_t doneMid; // two flags used to control loop // LONGCNT-1 - - // STACK FRAME VARS USED BY RGNBLT/BITBLT - - - uint16_t LONGCNT; // // FIRSTMASK-2 - uint32_t FIRSTMASK; // // SRCRECT2-4 - uint16_t SRCRECT2[4]; // -> SAVESTK2-16 -> DSTPIX-372 // HEIGHT-8 - uint16_t HEIGHT; // // SRCBUMP-2 - uint16_t SRCBUMP; // // DSTV-2 - uint16_t DSTV; // // SRCV-2 - uint16_t SRCV; // // SAVESTK2-2 - - // STACK FRAME VARS USED BY BITBLT ONLY - - - uint32_t SAVESTK2; // -> REALBOUNDS-20 -> DSTPIX-356 // SRCSIZE-4 - uint16_t SRCSIZE; // // filler2-2 - uint16_t filler2; // // RGNADDR-2 - uint32_t RGNADDR; // // HBUMP-4 - uint16_t HBUMP; // // VBUMP-2 - uint16_t VBUMP; // , MUST BE ABOVE HBUMP // LASTV-2 - uint16_t LASTV; // // FIRSTV-2 - uint16_t FIRSTV; // // REALBOUNDS-2 - - // STACK FRAME VARS USED BY RGNBLT ONLY - - - uint32_t REALBOUNDS; // -> MODECASE-44 -> DSTPIX-336 // CRSRFLAG-4 - uint16_t CRSRFLAG; // // VERROR-2 - uint16_t VERROR; // // ScaleTbl-2 - uint32_t ScaleTbl; // // MASKALIGN-4 - uint32_t MASKALIGN; // // DSTALIGN-4 - uint32_t DSTALIGN; // // SRCALIGN-4 - uint32_t SRCALIGN; // // SRCPIXCNT-4 - uint16_t SRCPIXCNT; // // SRCSCANS-2 - uint16_t SRCSCANS; // // SCALECASE-2 - uint32_t SCALECASE; // // MASKFRACT-4 - uint16_t MASKFRACT; // // HORIZFRACTION-2 - uint16_t HORIZFRACTION; // // MASKCASE-2 - uint32_t MASKCASE; // // RATIOCASE-4 - uint32_t RATIOCASE; // // MODECASE-4 - - // STACK FRAME VARS USED BY STRETCHBITS ONLY - - uint32_t MODECASE; // -> hilitColor-140-2*(PMREC+CTREC) (50+8) -> -256 -> DSTPIX -292 // MASKDENOM-4 - uint16_t MASKDENOM[2]; // // MASKNUMER-4 - uint16_t MASKNUMER[2]; // // DENOM-4 - uint16_t DENOM[2]; // // NUMER-4 - uint16_t NUMER[2]; // // SRCLIMIT-4 - uint32_t SRCLIMIT; // // DSTADDR-4 - uint32_t DSTADDR; // // MASKADDR-4 - uint32_t MASKADDR; // // SRCADDR-4 - uint32_t SRCADDR; // // SCALELONGS-4 - uint16_t SCALELONGS; // // DSTLONGS-2 - uint16_t DSTLONGS; // // DSTMASKLONGS-2 - uint16_t DSTMASKLONGS; // // SRCMASKLONGS-2 - uint16_t SRCMASKLONGS; // // SRCLONGS-2 - uint16_t SRCLONGS; // // filler1-2 - uint16_t filler1; // // SRCMASKBUF-2 - uint32_t SRCMASKBUF; // // scaleBufBump-4 - uint32_t scaleBufBump; // // dstBufBump-4 - uint32_t dstBufBump; // // SCALEBUF-4 - uint32_t SCALEBUF; // // DSTBUF-4 - uint32_t DSTBUF; // // SRCBUF-4 - uint32_t SRCBUF; // // SAVEA5-4 - - uint32_t SAVEA5; // YES // SAVESTK-4 - uint32_t SAVESTK; // YES // INVERTFLAG-4 - uint32_t INVERTFLAG; // YES // MASKSHIFT-4 - uint16_t MASKSHIFT; // YES // SRCSHIFT-2 - uint16_t SRCSHIFT; // YES // MASKROW-2 - uint32_t MASKROW; // YES // SRCROW-4 - uint32_t SRCROW; // YES // MASKPIX-4 - uint8_t MASKPIX[78]; // YES // alignMaskPM- - uint16_t alignMaskPM; // // SRCPIX-2 - uint8_t SRCPIX[78]; // YES // alignSrcPM- - uint16_t alignSrcPM; // // hilitColor-2 - - // MORE SHARED STACK FRAME VARS (STRETCHBITS, RGNBLT, BITBLT) - - uint32_t hilitColor; // hilite color pixels-> DSTPIX-36 // transColor-4 - uint32_t transColor; // copy of backcolor for transparent // rtShift-4 - uint16_t rtShift; // used by average how far to shift // invSize-2 - uint16_t invSize; // resolution of inverse color table // invColor-2 - uint32_t invColor; // pointer to inverse color table // colorTable-4 - uint32_t colorTable; // pointer to color table // BGnotWhite-4 - uint8_t BGnotWhite; // \ true if backcolor - white (must follow FGBlack) // FGnotBlack-1 - uint8_t FGnotBlack; // / true if forecolor - black // MMUsave-1 - uint8_t MMUsave; // MMU mode on entry to QD // multiColor-1 - uint8_t multiColor; // set if source contains nonblack/white colors // notWeight-1 - uint16_t notWeight[3]; // complement of weight (for average) // weight-6 - uint16_t weight[3]; // weight for averaging // DSTPIX-6 //uint16_t pin[3]; // used by max, min // weight - - uint8_t DSTPIX[78]; // +COLOR TABLE YES -> STACKFREE -54-(50+8) // NEWPATTERN- - uint8_t NEWPATTERN; // YES // useDither-1 - uint8_t useDither; // // BCOLOR-1 ;(was pixsrc) reclaimed 07Jul88 - uint32_t BCOLOR; // YES // FCOLOR-4 - uint32_t FCOLOR; // YES // LOCPAT-4 - uint32_t LOCPAT; // YES // LOCMODE-4 - uint16_t LOCMODE; // YES // PATVPOS-2 - uint32_t PATVPOS; // <8> YES // alphaMode-4 - uint8_t alphaMode; // <8> // filler5-1 - uint8_t filler5; // <8> YES // PATHPOS-1 - uint16_t PATHPOS; // YES // PATROW-2 - uint16_t PATROW; // (must follow PATHMASK) // PATHMASK-2 - uint16_t PATHMASK; // (must follow PATVMASK) // PATVMASK-2 - uint16_t PATVMASK; // (must follow expat) // EXPAT-2 - uint32_t* EXPAT; // YES // STACKFREE-4 - // SET UP FOR BITBLT FOR RGNBLT - - // (CALLED BY STRETCHBITS, RGNBLT, BITBLT, DRAWARC, DRAWLINE) - // STACK FRAME VARS USED BY PATEXPAND, COLORMAP, DRAWSLAB - - uint32_t STACKFREE; // -> // GoShow-4 - uint32_t GoShow; // Go home and show crsr // DSTROW-4 - uint32_t DSTROW; // // RUNBUMP-4 - uint16_t RUNBUMP; // // DSTSHIFT-2 - uint16_t DSTSHIFT; // // MINRECT-2 - Rect MINRECT; // // STATEC-8 - uint8_t STATEC[24]; // STATE RECORD // STATEB-RGNREC - uint8_t STATEB[24]; // STATE RECORD // STATEA-RGNREC - uint8_t STATEA[24]; // STATE RECORD // DSTMASKALIGN-RGNREC - uint32_t DSTMASKALIGN; // // DSTMASKBUF-4 - uint32_t DSTMASKBUF; // // SEEKMASK-4 - uint32_t SEEKMASK; // // RUNRTN-4 - uint32_t RUNRTN; // // EXRTN-4 - uint32_t EXRTN; // // BUFSIZE-4 - uint16_t BUFSIZE; // // BUFLEFT-2 - uint16_t BUFLEFT; // // RUNBUF-2 - uint32_t RUNBUF; // // RGNBUFFER-4 - uint32_t RGNBUFFER; // // VERT-4 - uint16_t VERT; // // RECTFLAG-2 - uint16_t RECTFLAG; // // EQU -2 ;WORD - - // (NOT USED IN PATEXPAND) - // STACK FRAME VARS USED BY SEEKMASK (CALLED BY STRETCHBITS, RGNBLT, DRAWARC, DRAWLINE) -}; +#include "NuBusFPGA_QD.h" +#ifdef QEMU +#define DLOG(x) bt->debug - (x); +#else +#define DLOG(X) +#endif int hwblit(char* stack, char* p_fb_base, /* short dstshift, */ short mode, Pattern* pat, PixMapPtr dstpix, PixMapPtr srcpix, Rect *dstrect, Rect *srcrect) { struct goblin_bt_regs* bt = (struct goblin_bt_regs*)(p_fb_base + GOBLIN_BT_OFFSET); @@ -357,19 +60,19 @@ int hwblit(char* stack, char* p_fb_base, /* short dstshift, */ short mode, Patte if ((mode != 0) && (mode != 8)) { // only copy handled for now #ifdef QEMU - bt->debug = -2L; - bt->debug = mode; + DLOG(-2L) + DLOG(mode) if (mode == 8) { - bt->debug = qdstack->PATROW; + DLOG(qdstack->PATROW) #if 0 - bt->debug = pat->pat[0]; - bt->debug = pat->pat[1]; - bt->debug = pat->pat[2]; - bt->debug = pat->pat[3]; - bt->debug = pat->pat[4]; - bt->debug = pat->pat[5]; - bt->debug = pat->pat[6]; - bt->debug = pat->pat[7]; + DLOG(pat->pat[0]) + DLOG(pat->pat[1]) + DLOG(pat->pat[2]) + DLOG(pat->pat[3]) + DLOG(pat->pat[4]) + DLOG(pat->pat[5]) + DLOG(pat->pat[6]) + DLOG(pat->pat[7]) #endif } #endif @@ -380,7 +83,7 @@ int hwblit(char* stack, char* p_fb_base, /* short dstshift, */ short mode, Patte register int i; register unsigned long expat0 = qdstack->EXPAT[0]; if (qdstack->PATROW != 0) { - bt->debug = -6L; + DLOG(-6L) return 0; } if ((expat0 & 0xFFFF) != ((expat0 >> 16) & 0xFFFF)) @@ -389,18 +92,19 @@ int hwblit(char* stack, char* p_fb_base, /* short dstshift, */ short mode, Patte return 0; for (i = 1 ; i < 16 ; i++) if (expat0 != qdstack->EXPAT[i]) { - bt->debug = -7L; - bt->debug = i; - bt->debug = expat0; - bt->debug = qdstack->EXPAT[i]; + DLOG(-7L) + DLOG(i) + DLOG(expat0) + DLOG(qdstack->EXPAT[i]) return 0; } } - + +#if 0 if (dstshift < 3) { // only 8/16/32 bits for now #ifdef QEMU - bt->debug = -3L; - bt->debug = dstshift; + DLOG(-3L) + DLOG(dstshift) #endif return 0; } @@ -408,17 +112,18 @@ int hwblit(char* stack, char* p_fb_base, /* short dstshift, */ short mode, Patte if (srcshift < 3) { // only 8/16/32 bits for now #ifdef QEMU - bt->debug = -8L; - bt->debug = srcshift; + DLOG(-8L) + DLOG(srcshift) #endif return 0; } srcshift -= 3; +#endif if (srcshift != dstshift) { - bt->debug = -9L; - bt->debug = srcshift; - bt->debug = dstshift; + DLOG(-9L) + DLOG(srcshift) + DLOG(dstshift) return 0; } @@ -429,18 +134,18 @@ int hwblit(char* stack, char* p_fb_base, /* short dstshift, */ short mode, Patte if (dstpix->baseAddr != p_fb_base) { // we're not destination #ifdef QEMU - bt->debug = -4L; - bt->debug = (unsigned long)dstpix->baseAddr; + DLOG(-4L) + DLOG((unsigned long)dstpix->baseAddr) #endif return 0; } if ((srcpix->baseAddr != p_fb_base) - // && ((unsigned long)srcpix->baseAddr >= 0x40000000) // and neither is main memory + // && ((unsigned long)srcpix->baseAddr >= 0x40000000) // and neither is main memory ){ #ifdef QEMU - bt->debug = -5L; - bt->debug = (unsigned long)srcpix->baseAddr; + DLOG(-5L) + DLOG((unsigned long)srcpix->baseAddr) #endif return 0; } @@ -448,6 +153,8 @@ int hwblit(char* stack, char* p_fb_base, /* short dstshift, */ short mode, Patte { Rect realrect, srcv, dstv; short width = qdstack->MINRECT.right - qdstack->MINRECT.left; + short src_check = 0x07 >> srcshift; + short dst_check = 0x07 >> dstshift; //*debug_ptr = -1L; @@ -466,6 +173,14 @@ int hwblit(char* stack, char* p_fb_base, /* short dstshift, */ short mode, Patte dstv.top = qdstack->MINRECT.top - dstpix->bounds.top; dstv.left = qdstack->MINRECT.left - dstpix->bounds.left; + // must be byte-aligned for now + if (width & src_check) + return 0; + if (srcv.left & src_check) + return 0; + if (dstv.left & dst_check) + return 0; + /* if .baseAddr of both pix are different, no overlap */ /* // the HW can handle that for us @@ -476,41 +191,41 @@ int hwblit(char* stack, char* p_fb_base, /* short dstshift, */ short mode, Patte #ifdef QEMU #if 0 if ((mode == 8) && (qdstack->PATROW == 0)) { - bt->debug = 0x87654321; - bt->debug = qdstack->EXPAT[ 0]; - bt->debug = qdstack->EXPAT[ 1]; - bt->debug = qdstack->EXPAT[ 2]; - bt->debug = qdstack->EXPAT[ 3]; - bt->debug = qdstack->EXPAT[ 4]; - bt->debug = qdstack->EXPAT[ 5]; - bt->debug = qdstack->EXPAT[ 6]; - bt->debug = qdstack->EXPAT[ 7]; - bt->debug = qdstack->EXPAT[ 8]; - bt->debug = qdstack->EXPAT[ 9]; - bt->debug = qdstack->EXPAT[10]; - bt->debug = qdstack->EXPAT[11]; - bt->debug = qdstack->EXPAT[12]; - bt->debug = qdstack->EXPAT[13]; - bt->debug = qdstack->EXPAT[14]; - bt->debug = qdstack->EXPAT[15]; + DLOG(0x87654321) + DLOG(qdstack->EXPAT[ 0]) + DLOG(qdstack->EXPAT[ 1]) + DLOG(qdstack->EXPAT[ 2]) + DLOG(qdstack->EXPAT[ 3]) + DLOG(qdstack->EXPAT[ 4]) + DLOG(qdstack->EXPAT[ 5]) + DLOG(qdstack->EXPAT[ 6]) + DLOG(qdstack->EXPAT[ 7]) + DLOG(qdstack->EXPAT[ 8]) + DLOG(qdstack->EXPAT[ 9]) + DLOG(qdstack->EXPAT[10]) + DLOG(qdstack->EXPAT[11]) + DLOG(qdstack->EXPAT[12]) + DLOG(qdstack->EXPAT[13]) + DLOG(qdstack->EXPAT[14]) + DLOG(qdstack->EXPAT[15]) } #endif - bt->debug = -1L; + DLOG(-1L) - bt->debug = srcpix->rowBytes; - bt->debug = dstpix->rowBytes; + DLOG(srcpix->rowBytes) + DLOG(dstpix->rowBytes) - bt->debug = srcv.top; - bt->debug = srcv.left; + DLOG(srcv.top) + DLOG(srcv.left) - bt->debug = height; - bt->debug = width; + DLOG(height) + DLOG(width) - bt->debug = dstv.top; - bt->debug = dstv.left; + DLOG(dstv.top) + DLOG(dstv.left) - bt->debug = (long)dstpix->baseAddr; - bt->debug = (long)srcpix->baseAddr; + DLOG((long)dstpix->baseAddr) + DLOG((long)srcpix->baseAddr) return 0; #else @@ -617,10 +332,10 @@ void main(void) accel_base = ((char*)fb_base + GOBLIN_ACCEL_OFFSET); bt = (struct goblin_bt_regs*)bt_base; - bt->debug = 0xDEADBEEF; - bt->debug = (unsigned long)fb_base; - bt->debug = (unsigned long)bt_base; - bt->debug = (unsigned long)accel_base; + DLOG(0xDEADBEEF) + DLOG((unsigned long)fb_base) + DLOG((unsigned long)bt_base) + DLOG((unsigned long)accel_base) h = Get1Resource('INIT', kINITid); if (h) { diff --git a/nubus-to-ztex-gateware/NuBusFPGAInit/NuBusFPGA_QD.h b/nubus-to-ztex-gateware/NuBusFPGAInit/NuBusFPGA_QD.h new file mode 100644 index 0000000..113272f --- /dev/null +++ b/nubus-to-ztex-gateware/NuBusFPGAInit/NuBusFPGA_QD.h @@ -0,0 +1,303 @@ +// this is the stack frame upon entry in BitBlt as described in DravingVars.a +// wrong order as it's going in negative offset +struct qdstuff_order { + // STACK FRAME VARS USED BY SEEKMASK (CALLED BY STRETCHBITS, RGNBLT, DRAWARC, DRAWLINE) + // (NOT USED IN PATEXPAND) + + uint16_t RECTFLAG; // // EQU -2 ;WORD + uint16_t VERT; // // RECTFLAG-2 + uint32_t RGNBUFFER; // // VERT-4 + uint32_t RUNBUF; // // RGNBUFFER-4 + uint16_t BUFLEFT; // // RUNBUF-2 + uint16_t BUFSIZE; // // BUFLEFT-2 + uint32_t EXRTN; // // BUFSIZE-4 + uint32_t RUNRTN; // // EXRTN-4 + uint32_t SEEKMASK; // // RUNRTN-4 + uint32_t DSTMASKBUF; // // SEEKMASK-4 + uint32_t DSTMASKALIGN; // // DSTMASKBUF-4 + uint8_t STATEA[24]; // STATE RECORD // DSTMASKALIGN-RGNREC + uint8_t STATEB[24]; // STATE RECORD // STATEA-RGNREC + uint8_t STATEC[24]; // STATE RECORD // STATEB-RGNREC + uint16_t MINRECT[4]; // // STATEC-8 + uint16_t DSTSHIFT; // // MINRECT-2 + uint16_t RUNBUMP; // // DSTSHIFT-2 + uint32_t DSTROW; // // RUNBUMP-4 + uint32_t GoShow; // Go home and show crsr // DSTROW-4 + uint32_t STACKFREE; // -> // GoShow-4 + + // STACK FRAME VARS USED BY PATEXPAND, COLORMAP, DRAWSLAB + // (CALLED BY STRETCHBITS, RGNBLT, BITBLT, DRAWARC, DRAWLINE) + + // SET UP FOR BITBLT FOR RGNBLT + uint32_t EXPAT; // YES // STACKFREE-4 + uint16_t PATVMASK; // (must follow expat) // EXPAT-2 + uint16_t PATHMASK; // (must follow PATVMASK) // PATVMASK-2 + uint16_t PATROW; // (must follow PATHMASK) // PATHMASK-2 + uint16_t PATHPOS; // YES // PATROW-2 + uint8_t filler5; // <8> YES // PATHPOS-1 + uint8_t alphaMode; // <8> // filler5-1 + uint32_t PATVPOS; // <8> YES // alphaMode-4 + uint16_t LOCMODE; // YES // PATVPOS-2 + uint32_t LOCPAT; // YES // LOCMODE-4 + uint32_t FCOLOR; // YES // LOCPAT-4 + uint32_t BCOLOR; // YES // FCOLOR-4 + uint8_t useDither; // // BCOLOR-1 ;(was pixsrc) reclaimed 07Jul88 + uint8_t NEWPATTERN; // YES // useDither-1 + uint8_t DSTPIX[78]; // +COLOR TABLE YES -> STACKFREE -54-(50+8) // NEWPATTERN- + + uint16_t weight[3]; // weight for averaging // DSTPIX-6 //uint16_t pin[3]; // used by max, min // weight + uint16_t notWeight[3]; // complement of weight (for average) // weight-6 + uint8_t multiColor; // set if source contains nonblack/white colors // notWeight-1 + uint8_t MMUsave; // MMU mode on entry to QD // multiColor-1 + uint8_t FGnotBlack; // / true if forecolor - black // MMUsave-1 + uint8_t BGnotWhite; // \ true if backcolor - white (must follow FGBlack) // FGnotBlack-1 + uint32_t colorTable; // pointer to color table // BGnotWhite-4 + uint32_t invColor; // pointer to inverse color table // colorTable-4 + uint16_t invSize; // resolution of inverse color table // invColor-2 + uint16_t rtShift; // used by average how far to shift // invSize-2 + uint32_t transColor; // copy of backcolor for transparent // rtShift-4 + uint32_t hilitColor; // hilite color pixels-> DSTPIX-36 // transColor-4 + + // MORE SHARED STACK FRAME VARS (STRETCHBITS, RGNBLT, BITBLT) + + uint16_t alignSrcPM; // // hilitColor-2 + uint8_t SRCPIX[78]; // YES // alignSrcPM- + uint16_t alignMaskPM; // // SRCPIX-2 + uint8_t MASKPIX[78]; // YES // alignMaskPM- + uint32_t SRCROW; // YES // MASKPIX-4 + uint32_t MASKROW; // YES // SRCROW-4 + uint16_t SRCSHIFT; // YES // MASKROW-2 + uint16_t MASKSHIFT; // YES // SRCSHIFT-2 + uint32_t INVERTFLAG; // YES // MASKSHIFT-4 + uint32_t SAVESTK; // YES // INVERTFLAG-4 + uint32_t SAVEA5; // YES // SAVESTK-4 + + uint32_t SRCBUF; // // SAVEA5-4 + uint32_t DSTBUF; // // SRCBUF-4 + uint32_t SCALEBUF; // // DSTBUF-4 + uint32_t dstBufBump; // // SCALEBUF-4 + uint32_t scaleBufBump; // // dstBufBump-4 + uint32_t SRCMASKBUF; // // scaleBufBump-4 + uint16_t filler1; // // SRCMASKBUF-2 + uint16_t SRCLONGS; // // filler1-2 + uint16_t SRCMASKLONGS; // // SRCLONGS-2 + uint16_t DSTMASKLONGS; // // SRCMASKLONGS-2 + uint16_t DSTLONGS; // // DSTMASKLONGS-2 + uint16_t SCALELONGS; // // DSTLONGS-2 + uint32_t SRCADDR; // // SCALELONGS-4 + uint32_t MASKADDR; // // SRCADDR-4 + uint32_t DSTADDR; // // MASKADDR-4 + uint32_t SRCLIMIT; // // DSTADDR-4 + uint16_t NUMER[2]; // // SRCLIMIT-4 + uint16_t DENOM[2]; // // NUMER-4 + uint16_t MASKNUMER[2]; // // DENOM-4 + uint16_t MASKDENOM[2]; // // MASKNUMER-4 + uint32_t MODECASE; // -> hilitColor-140-2*(PMREC+CTREC) (50+8) -> -256 -> DSTPIX -292 // MASKDENOM-4 + + // STACK FRAME VARS USED BY STRETCHBITS ONLY + + uint32_t RATIOCASE; // // MODECASE-4 + uint32_t MASKCASE; // // RATIOCASE-4 + uint16_t HORIZFRACTION; // // MASKCASE-2 + uint16_t MASKFRACT; // // HORIZFRACTION-2 + uint32_t SCALECASE; // // MASKFRACT-4 + uint16_t SRCSCANS; // // SCALECASE-2 + uint16_t SRCPIXCNT; // // SRCSCANS-2 + uint32_t SRCALIGN; // // SRCPIXCNT-4 + uint32_t DSTALIGN; // // SRCALIGN-4 + uint32_t MASKALIGN; // // DSTALIGN-4 + uint32_t ScaleTbl; // // MASKALIGN-4 + uint16_t VERROR; // // ScaleTbl-2 + uint16_t CRSRFLAG; // // VERROR-2 + uint32_t REALBOUNDS; // -> MODECASE-44 -> DSTPIX-336 // CRSRFLAG-4 + + + // STACK FRAME VARS USED BY RGNBLT ONLY + + uint16_t FIRSTV; // // REALBOUNDS-2 + uint16_t LASTV; // // FIRSTV-2 + uint16_t VBUMP; // , MUST BE ABOVE HBUMP // LASTV-2 + uint16_t HBUMP; // // VBUMP-2 + uint32_t RGNADDR; // // HBUMP-4 + uint16_t filler2; // // RGNADDR-2 + uint16_t SRCSIZE; // // filler2-2 + uint32_t SAVESTK2; // -> REALBOUNDS-20 -> DSTPIX-356 // SRCSIZE-4 + + + // STACK FRAME VARS USED BY BITBLT ONLY + + uint16_t SRCV; // // SAVESTK2-2 + uint16_t DSTV; // // SRCV-2 + uint16_t SRCBUMP; // // DSTV-2 + uint16_t HEIGHT; // // SRCBUMP-2 + uint16_t SRCRECT2[4]; // -> SAVESTK2-16 -> DSTPIX-372 // HEIGHT-8 + uint32_t FIRSTMASK; // // SRCRECT2-4 + uint16_t LONGCNT; // // FIRSTMASK-2 + + + // STACK FRAME VARS USED BY RGNBLT/BITBLT + + uint8_t doneMid; // two flags used to control loop // LONGCNT-1 + uint8_t endSwitch; // three-way switch chooses from src, pat, bigpat // doneMid-1 + uint32_t lastMask; // mask for last long blitted on line // endSwitch-4 + uint16_t midCount; // # of pixels on line less mask longs - 1 // lastMask-2 + uint16_t pixInLong; // # of pixels in a long - 1 // midCount-2 + uint32_t patOffset; // pattern horizontal initial offset // pixInLong-4 + uint16_t patPos; // pattern vertical offset // patOffset-2 + uint16_t destPixCnt; // 1-based cnt of pixels to blit<02Mar89 BAL> // patPos-2 + uint32_t destPixOffset; // destination pixel offset <08Jan89 BAL> // destPixCnt-4 + uint16_t pixInLong1; // same as pixInLong, 1 based (for transparent) // destPixOffset-2 + uint16_t longBump; // 32 signed direction of blit (for transparent) // pixInLong1-2 +}; + +// same as above, but lines are in reverse order so it can be used directly once the pointer to the stack frame is known +// some types have been hand-converted (e.g. MINRECT to Rect) +struct qdstuff { + uint16_t longBump; // 32 signed direction of blit (for transparent) // pixInLong1-2 + uint16_t pixInLong1; // same as pixInLong, 1 based (for transparent) // destPixOffset-2 + uint32_t destPixOffset; // destination pixel offset <08Jan89 BAL> // destPixCnt-4 + uint16_t destPixCnt; // 1-based cnt of pixels to blit<02Mar89 BAL> // patPos-2 + uint16_t patPos; // pattern vertical offset // patOffset-2 + uint32_t patOffset; // pattern horizontal initial offset // pixInLong-4 + uint16_t pixInLong; // # of pixels in a long - 1 // midCount-2 + uint16_t midCount; // # of pixels on line less mask longs - 1 // lastMask-2 + uint32_t lastMask; // mask for last long blitted on line // endSwitch-4 + uint8_t endSwitch; // three-way switch chooses from src, pat, bigpat // doneMid-1 + uint8_t doneMid; // two flags used to control loop // LONGCNT-1 + + // STACK FRAME VARS USED BY RGNBLT/BITBLT + + + uint16_t LONGCNT; // // FIRSTMASK-2 + uint32_t FIRSTMASK; // // SRCRECT2-4 + uint16_t SRCRECT2[4]; // -> SAVESTK2-16 -> DSTPIX-372 // HEIGHT-8 + uint16_t HEIGHT; // // SRCBUMP-2 + uint16_t SRCBUMP; // // DSTV-2 + uint16_t DSTV; // // SRCV-2 + uint16_t SRCV; // // SAVESTK2-2 + + // STACK FRAME VARS USED BY BITBLT ONLY + + + uint32_t SAVESTK2; // -> REALBOUNDS-20 -> DSTPIX-356 // SRCSIZE-4 + uint16_t SRCSIZE; // // filler2-2 + uint16_t filler2; // // RGNADDR-2 + uint32_t RGNADDR; // // HBUMP-4 + uint16_t HBUMP; // // VBUMP-2 + uint16_t VBUMP; // , MUST BE ABOVE HBUMP // LASTV-2 + uint16_t LASTV; // // FIRSTV-2 + uint16_t FIRSTV; // // REALBOUNDS-2 + + // STACK FRAME VARS USED BY RGNBLT ONLY + + + uint32_t REALBOUNDS; // -> MODECASE-44 -> DSTPIX-336 // CRSRFLAG-4 + uint16_t CRSRFLAG; // // VERROR-2 + uint16_t VERROR; // // ScaleTbl-2 + uint32_t ScaleTbl; // // MASKALIGN-4 + uint32_t MASKALIGN; // // DSTALIGN-4 + uint32_t DSTALIGN; // // SRCALIGN-4 + uint32_t SRCALIGN; // // SRCPIXCNT-4 + uint16_t SRCPIXCNT; // // SRCSCANS-2 + uint16_t SRCSCANS; // // SCALECASE-2 + uint32_t SCALECASE; // // MASKFRACT-4 + uint16_t MASKFRACT; // // HORIZFRACTION-2 + uint16_t HORIZFRACTION; // // MASKCASE-2 + uint32_t MASKCASE; // // RATIOCASE-4 + uint32_t RATIOCASE; // // MODECASE-4 + + // STACK FRAME VARS USED BY STRETCHBITS ONLY + + uint32_t MODECASE; // -> hilitColor-140-2*(PMREC+CTREC) (50+8) -> -256 -> DSTPIX -292 // MASKDENOM-4 + uint16_t MASKDENOM[2]; // // MASKNUMER-4 + uint16_t MASKNUMER[2]; // // DENOM-4 + uint16_t DENOM[2]; // // NUMER-4 + uint16_t NUMER[2]; // // SRCLIMIT-4 + uint32_t SRCLIMIT; // // DSTADDR-4 + uint32_t DSTADDR; // // MASKADDR-4 + uint32_t MASKADDR; // // SRCADDR-4 + uint32_t SRCADDR; // // SCALELONGS-4 + uint16_t SCALELONGS; // // DSTLONGS-2 + uint16_t DSTLONGS; // // DSTMASKLONGS-2 + uint16_t DSTMASKLONGS; // // SRCMASKLONGS-2 + uint16_t SRCMASKLONGS; // // SRCLONGS-2 + uint16_t SRCLONGS; // // filler1-2 + uint16_t filler1; // // SRCMASKBUF-2 + uint32_t SRCMASKBUF; // // scaleBufBump-4 + uint32_t scaleBufBump; // // dstBufBump-4 + uint32_t dstBufBump; // // SCALEBUF-4 + uint32_t SCALEBUF; // // DSTBUF-4 + uint32_t DSTBUF; // // SRCBUF-4 + uint32_t SRCBUF; // // SAVEA5-4 + + uint32_t SAVEA5; // YES // SAVESTK-4 + uint32_t SAVESTK; // YES // INVERTFLAG-4 + uint32_t INVERTFLAG; // YES // MASKSHIFT-4 + uint16_t MASKSHIFT; // YES // SRCSHIFT-2 + uint16_t SRCSHIFT; // YES // MASKROW-2 + uint32_t MASKROW; // YES // SRCROW-4 + uint32_t SRCROW; // YES // MASKPIX-4 + uint8_t MASKPIX[78]; // YES // alignMaskPM- + uint16_t alignMaskPM; // // SRCPIX-2 + uint8_t SRCPIX[78]; // YES // alignSrcPM- + uint16_t alignSrcPM; // // hilitColor-2 + + // MORE SHARED STACK FRAME VARS (STRETCHBITS, RGNBLT, BITBLT) + + uint32_t hilitColor; // hilite color pixels-> DSTPIX-36 // transColor-4 + uint32_t transColor; // copy of backcolor for transparent // rtShift-4 + uint16_t rtShift; // used by average how far to shift // invSize-2 + uint16_t invSize; // resolution of inverse color table // invColor-2 + uint32_t invColor; // pointer to inverse color table // colorTable-4 + uint32_t colorTable; // pointer to color table // BGnotWhite-4 + uint8_t BGnotWhite; // \ true if backcolor - white (must follow FGBlack) // FGnotBlack-1 + uint8_t FGnotBlack; // / true if forecolor - black // MMUsave-1 + uint8_t MMUsave; // MMU mode on entry to QD // multiColor-1 + uint8_t multiColor; // set if source contains nonblack/white colors // notWeight-1 + uint16_t notWeight[3]; // complement of weight (for average) // weight-6 + uint16_t weight[3]; // weight for averaging // DSTPIX-6 //uint16_t pin[3]; // used by max, min // weight + + uint8_t DSTPIX[78]; // +COLOR TABLE YES -> STACKFREE -54-(50+8) // NEWPATTERN- + uint8_t NEWPATTERN; // YES // useDither-1 + uint8_t useDither; // // BCOLOR-1 ;(was pixsrc) reclaimed 07Jul88 + uint32_t BCOLOR; // YES // FCOLOR-4 + uint32_t FCOLOR; // YES // LOCPAT-4 + uint32_t LOCPAT; // YES // LOCMODE-4 + uint16_t LOCMODE; // YES // PATVPOS-2 + uint32_t PATVPOS; // <8> YES // alphaMode-4 + uint8_t alphaMode; // <8> // filler5-1 + uint8_t filler5; // <8> YES // PATHPOS-1 + uint16_t PATHPOS; // YES // PATROW-2 + uint16_t PATROW; // (must follow PATHMASK) // PATHMASK-2 + uint16_t PATHMASK; // (must follow PATVMASK) // PATVMASK-2 + uint16_t PATVMASK; // (must follow expat) // EXPAT-2 + uint32_t* EXPAT; // YES // STACKFREE-4 + // SET UP FOR BITBLT FOR RGNBLT + + // (CALLED BY STRETCHBITS, RGNBLT, BITBLT, DRAWARC, DRAWLINE) + // STACK FRAME VARS USED BY PATEXPAND, COLORMAP, DRAWSLAB + + uint32_t STACKFREE; // -> // GoShow-4 + uint32_t GoShow; // Go home and show crsr // DSTROW-4 + uint32_t DSTROW; // // RUNBUMP-4 + uint16_t RUNBUMP; // // DSTSHIFT-2 + uint16_t DSTSHIFT; // // MINRECT-2 + Rect MINRECT; // // STATEC-8 + uint8_t STATEC[24]; // STATE RECORD // STATEB-RGNREC + uint8_t STATEB[24]; // STATE RECORD // STATEA-RGNREC + uint8_t STATEA[24]; // STATE RECORD // DSTMASKALIGN-RGNREC + uint32_t DSTMASKALIGN; // // DSTMASKBUF-4 + uint32_t DSTMASKBUF; // // SEEKMASK-4 + uint32_t SEEKMASK; // // RUNRTN-4 + uint32_t RUNRTN; // // EXRTN-4 + uint32_t EXRTN; // // BUFSIZE-4 + uint16_t BUFSIZE; // // BUFLEFT-2 + uint16_t BUFLEFT; // // RUNBUF-2 + uint32_t RUNBUF; // // RGNBUFFER-4 + uint32_t RGNBUFFER; // // VERT-4 + uint16_t VERT; // // RECTFLAG-2 + uint16_t RECTFLAG; // // EQU -2 ;WORD + + // (NOT USED IN PATEXPAND) + // STACK FRAME VARS USED BY SEEKMASK (CALLED BY STRETCHBITS, RGNBLT, DRAWARC, DRAWLINE) +}; diff --git a/nubus-to-ztex-gateware/blit.c b/nubus-to-ztex-gateware/blit.c index 56695d8..3be0ad8 100644 --- a/nubus-to-ztex-gateware/blit.c +++ b/nubus-to-ztex-gateware/blit.c @@ -189,24 +189,46 @@ void from_reset(void) { struct goblin_accel_regs* fbc = (struct goblin_accel_regs*)BASE_ACCEL_REGS; struct goblin_bt_regs* fbt = (struct goblin_bt_regs*)BASE_BT_REGS; unsigned int cmd = fbc->reg_r5_cmd; - unsigned char scale; + uint32_t srcx, wi, dstx; switch ((fbt->mode>>24) & 0xFF) { // mode is 8 bits wrong-endian (all fbt is wrong-endian) case mode_32bit: - scale = 2; + srcx = fbc->reg_bitblt_src_x << 2; + wi = fbc->reg_width << 2; + dstx = fbc->reg_bitblt_dst_x << 2; break; case mode_16bit: - scale = 1; + srcx = fbc->reg_bitblt_src_x << 1; + wi = fbc->reg_width << 1; + dstx = fbc->reg_bitblt_dst_x << 1; break; default: - scale = 0; + case mode_8bit: + srcx = fbc->reg_bitblt_src_x; + wi = fbc->reg_width; + dstx = fbc->reg_bitblt_dst_x; + break; + case mode_4bit: + srcx = fbc->reg_bitblt_src_x >> 1; + wi = fbc->reg_width >> 1; + dstx = fbc->reg_bitblt_dst_x >> 1; + break; + case mode_2bit: + srcx = fbc->reg_bitblt_src_x >> 2; + wi = fbc->reg_width >> 2; + dstx = fbc->reg_bitblt_dst_x >> 2; + break; + case mode_1bit: + srcx = fbc->reg_bitblt_src_x >> 3; + wi = fbc->reg_width >> 3; + dstx = fbc->reg_bitblt_dst_x >> 3; break; } switch (cmd & 0xF) { case FUN_BLIT: { - bitblit(fbc->reg_bitblt_src_x << scale, fbc->reg_bitblt_src_y, - fbc->reg_width << scale, fbc->reg_height, - fbc->reg_bitblt_dst_x << scale, fbc->reg_bitblt_dst_y, + bitblit(srcx, fbc->reg_bitblt_src_y, + wi , fbc->reg_height, + dstx, fbc->reg_bitblt_dst_y, 0xFF, 0x3, // GXcopy fbc->reg_src_ptr ? (unsigned char*)fbc->reg_src_ptr : (unsigned char*)BASE_FB, fbc->reg_dst_ptr ? (unsigned char*)fbc->reg_dst_ptr : (unsigned char*)BASE_FB, @@ -214,8 +236,8 @@ void from_reset(void) { fbc->reg_dst_stride); // assumed to be scaled already } break; case FUN_FILL: { - rectfill(fbc->reg_bitblt_dst_x << scale, fbc->reg_bitblt_dst_y, - fbc->reg_width << scale, fbc->reg_height, + rectfill(dstx, fbc->reg_bitblt_dst_y, + wi , fbc->reg_height, fbc->reg_fgcolor, fbc->reg_dst_ptr ? (unsigned char*)fbc->reg_dst_ptr : (unsigned char*)BASE_FB, fbc->reg_dst_stride); // assumed to be scaled already diff --git a/nubus-to-ztex-gateware/nubus_full.py b/nubus-to-ztex-gateware/nubus_full.py index e2b028d..588604b 100644 --- a/nubus-to-ztex-gateware/nubus_full.py +++ b/nubus-to-ztex-gateware/nubus_full.py @@ -164,7 +164,7 @@ class NuBus(Module): tm1_o_n.eq(1), ack_o_n.eq(1), If(wb_read.ack, - ad_oe.eq(1), + ad_oe.eq(1), ad_o_n.eq(~wb_read.dat_r), tm0_o_n.eq(0), tm1_o_n.eq(0), diff --git a/nubus-to-ztex-gateware/nubus_full_sampling.py b/nubus-to-ztex-gateware/nubus_full_sampling.py new file mode 100644 index 0000000..b87d317 --- /dev/null +++ b/nubus-to-ztex-gateware/nubus_full_sampling.py @@ -0,0 +1,445 @@ +from migen import * +from migen.genlib.fifo import * +from migen.genlib.cdc import * +from migen.fhdl.specials import Tristate + +import litex +from litex.soc.interconnect import wishbone + +class NuBus(Module): + def __init__(self, platform, wb_read, wb_write, wb_dma, cd_nubus="nubus", cd_nubus90="nubus90"): + + self.add_sources(platform) + + #led0 = platform.request("user_led", 0) + #led1 = platform.request("user_led", 1) + + nub_clk = ClockSignal(cd_nubus) + nub_resetn = ~ResetSignal(cd_nubus) + nub_clk_prev_bits = 4 # how many cycles after posedge do we still dare set some signals (i.e. still before setup time before negedge) + nub_clk_prev = Signal(nub_clk_prev_bits) + nub_clk_negedge = Signal() + nub_clk_posedge = Signal() + nub_clk_insetup = Signal() + self.sync += [ + nub_clk_prev[0].eq(nub_clk), + ] + self.sync += [ + nub_clk_prev[i].eq(nub_clk_prev[i-1]) for i in range(1, nub_clk_prev_bits) + ] + self.sync += [ + nub_clk_negedge.eq(~nub_clk & nub_clk_prev[0]), + nub_clk_posedge.eq( nub_clk & ~nub_clk_prev[0]), + nub_clk_insetup.eq( nub_clk & (nub_clk_prev != ((2**nub_clk_prev_bits)-1))), # if one of the previous X cycles is zero, we're early enough to set up signals + ] + + # Signals for tri-stated nubus access + # slave + tmo_oe = Signal() # output enable + tm0_i_n = Signal() + tm0_o_n = Signal() + tm1_i_n = Signal() + tm1_o_n = Signal() + ack_i_n = Signal() + ack_o_n = Signal() + + ad_oe = Signal() + ad_i_n = Signal(32) + ad_o_n = Signal(32) + + id_i_n = Signal(4) + + start_i_n = Signal() + start_o_n = Signal() # master via master_oe + + # master + rqst_oe = Signal() + rqst_i_n = Signal() + rqst_o_n = Signal() + + # sampled signals, exposing the value of the register acquired on the falling edge + # they can change every cycle *on falling edge* + # slave + sampled_tm0 = Signal() # high is byte (which byte is in ad0/ad1); low is halfword/word/block depending on ad0/ad1 + sampled_tm1 = Signal() # high is write + sampled_start = Signal() + sampled_ack = Signal() + sampled_ad = Signal(32) + + # master + sampled_rqst = Signal() + + # address rewriting + # can change every cycle *on falling edge* + processed_ad = Signal(32) + self.comb += [ + processed_ad[0:23].eq(sampled_ad[0:23]), + If(~sampled_ad[23], # first 8 MiB of slot space: remap to last 8 Mib of SDRAM + processed_ad[23:32].eq(Cat(Signal(1, reset=1), Signal(8, reset = 0x8f))), # 0x8f8... + ).Else( # second 8 MiB: direct access + processed_ad[23:32].eq(Cat(sampled_ad[23], Signal(8, reset = 0xf0)))), # 24 bits, a.k.a 22 bits of words + ] + + # decoded signals, exposing decoded results from the sampled signals + # they can change every cycle *on falling edge* + # from sampling (fixme?) + decoded_sel = Signal(4) + decoded_block = Signal() + decoded_busy = Signal() + # locally evaluated + decoded_myslot = Signal() + self.comb += [ + decoded_myslot.eq( + (sampled_ad[28:32] == 0xF) & + (sampled_ad[27] == ~id_i_n[3]) & + (sampled_ad[26] == ~id_i_n[2]) & + (sampled_ad[25] == ~id_i_n[1]) & + (sampled_ad[24] == ~id_i_n[0])), + #led0.eq(decoded_block), + ] + + # current value, registered from the sampled/processed/decoded signals + # change is controlled by the FSM + current_adr = Signal(32) + current_tm0 = Signal() + current_tm1 = Signal() + current_sel = Signal(4) + current_block = Signal() + current_data = Signal(32) + + # write FIFO to speed up bus turnaround on NuBus side + write_fifo_layout = [ + ("adr", 32), + ("data", 32), + ("sel", 4), + ] + self.submodules.write_fifo = write_fifo = SyncFIFOBuffered(width=layout_len(write_fifo_layout), depth=16) + write_fifo_dout = Record(write_fifo_layout) + self.comb += write_fifo_dout.raw_bits().eq(write_fifo.dout) + write_fifo_din = Record(write_fifo_layout) + self.comb += write_fifo.din.eq(write_fifo_din.raw_bits()) + + self.sync += [ + #If((~nub_clk & nub_clk_prev[0]), # simultaneous with setting negedge + If(nub_clk_negedge, + sampled_tm0.eq(~tm0_i_n), + sampled_tm1.eq(~tm1_i_n), + sampled_start.eq(~start_i_n), + sampled_rqst.eq(~rqst_i_n), + sampled_ack.eq(~ack_i_n), + sampled_ad.eq(~ad_i_n), + ) + ] + + self.comb += [ + decoded_block.eq(sampled_ad[1] & ~sampled_ad[0] & ~sampled_tm0), # 1x block write or 1x block read + decoded_sel[3].eq(sampled_tm1 & sampled_ad[1] & sampled_ad[0] & sampled_tm0 # Byte 3 + | sampled_tm1 & sampled_ad[1] & sampled_ad[0] & ~sampled_tm0 # Half 1 + | sampled_tm1 & ~sampled_ad[1] & ~sampled_ad[0] & ~sampled_tm0 # Word + ), + decoded_sel[2].eq(sampled_tm1 & sampled_ad[1] & ~sampled_ad[0] & sampled_tm0 # Byte 2 + | sampled_tm1 & sampled_ad[1] & sampled_ad[0] & ~sampled_tm0 # Half 1 + | sampled_tm1 & ~sampled_ad[1] & ~sampled_ad[0] & ~sampled_tm0 # Word + ), + decoded_sel[1].eq(sampled_tm1 & ~sampled_ad[1] & sampled_ad[0] & sampled_tm0 # Byte 1 + | sampled_tm1 & ~sampled_ad[1] & sampled_ad[0] & ~sampled_tm0 # Half 0 + | sampled_tm1 & ~sampled_ad[1] & ~sampled_ad[0] & ~sampled_tm0 # Word + ), + decoded_sel[0].eq(sampled_tm1 & ~sampled_ad[1] & ~sampled_ad[0] & sampled_tm0 # Byte 0 + | sampled_tm1 & ~sampled_ad[1] & sampled_ad[0] & ~sampled_tm0 # Half 0 + | sampled_tm1 & ~sampled_ad[1] & ~sampled_ad[0] & ~sampled_tm0 # Word + ), + ] + + self.read_ctr = read_ctr = Signal(32) + self.writ_ctr = writ_ctr = Signal(32) + + self.submodules.slave_fsm = slave_fsm = FSM(reset_state="Reset") + slave_fsm.act("Reset", + NextState("Idle") + ) + slave_fsm.act("Idle", + # only react to transaction start at posedge + If(nub_clk_posedge & decoded_myslot & sampled_start & ~sampled_ack & ~sampled_tm1,# & ~decoded_block, # regular read (we always send back 32 bits, so don't worry about byte/word) + NextValue(current_adr, processed_ad), + #NextValue(current_tm0, sampled_tm0), + #NextValue(current_tm1, sampled_tm1), + #NextValue(current_sel, decoded_sel), + #NextValue(current_block, decoded_block), + #If(decoded_block, + # NextValue(decoded_block_memory, 1),), + NextValue(read_ctr, read_ctr + 1), + NextState("WaitWBRead"), + ).Elif(nub_clk_posedge & decoded_myslot & sampled_start & ~sampled_ack & sampled_tm1,# & ~decoded_block, # regular write + NextValue(current_adr, processed_ad), + #NextValue(current_tm0, sampled_tm0), + #NextValue(current_tm1, sampled_tm1), + NextValue(current_sel, decoded_sel), + #NextValue(current_block, decoded_block), + #If(decoded_block, + # NextValue(decoded_block_memory, 1),), + #NextState("GetNubusWriteData"), + NextValue(writ_ctr, writ_ctr + 1), + If(write_fifo.writable, + NextState("NubusWriteDataToFIFO"), + ).Else( + NextState("NubusWaitForFIFO"), + ) + ) + ) + slave_fsm.act("WaitWBRead", + wb_read.cyc.eq(1), + wb_read.stb.eq(1), + wb_read.we.eq(0), + wb_read.sel.eq(0xf), + wb_read.adr.eq(current_adr[2:32]), + tmo_oe.eq(1), + tm0_o_n.eq(1), + tm1_o_n.eq(1), + ack_o_n.eq(1), + If(wb_read.ack, + NextValue(current_data, wb_read.dat_r), + If(nub_clk_insetup, + ad_oe.eq(1), + ad_o_n.eq(~wb_read.dat_r), + tm0_o_n.eq(0), + tm1_o_n.eq(0), + ack_o_n.eq(0), + NextState("FinishRead"), + ).Else( + NextState("WaitBeforeFinishRead"), + ) + ) + ) + slave_fsm.act("WaitBeforeFinishRead", + tmo_oe.eq(1), + tm0_o_n.eq(1), + tm1_o_n.eq(1), + ack_o_n.eq(1), + If(nub_clk_insetup, + ad_oe.eq(1), + ad_o_n.eq(~current_data), + tm0_o_n.eq(0), + tm1_o_n.eq(0), + ack_o_n.eq(0), + NextState("FinishRead"), + ), + ) + slave_fsm.act("FinishRead", + tmo_oe.eq(1), + ad_oe.eq(1), + ad_o_n.eq(~current_data), + tm0_o_n.eq(0), + tm1_o_n.eq(0), + ack_o_n.eq(0), + #If((~nub_clk & nub_clk_prev[0]), # simultaneous with setting negedge + If(nub_clk_negedge, + NextState("ReadCleanup"), + ) + ) + slave_fsm.act("ReadCleanup", + tmo_oe.eq(1), + ad_oe.eq(1), + ad_o_n.eq(~current_data), + tm0_o_n.eq(0), + tm1_o_n.eq(0), + ack_o_n.eq(0), + NextState("Idle"), + ), + + slave_fsm.act("NubusWriteDataToFIFO", + tmo_oe.eq(1), + tm0_o_n.eq(0), + tm1_o_n.eq(0), + ack_o_n.eq(0), + #If((~nub_clk & nub_clk_prev[0]), # simultaneous with setting negedge + If(nub_clk_negedge, + write_fifo.we.eq(1), + NextState("WriteCleanup"), + ) + ) + slave_fsm.act("NubusWaitForFIFO", + tmo_oe.eq(1), + tm0_o_n.eq(1), + tm1_o_n.eq(1), + ack_o_n.eq(1), + If(nub_clk_posedge & write_fifo.writable, + NextState("NubusWriteDataToFIFO"), + ) + ) + slave_fsm.act("WriteCleanup", # extra sysclk cycle after negedge + tmo_oe.eq(1), + tm0_o_n.eq(0), + tm1_o_n.eq(0), + ack_o_n.eq(0), + NextState("Idle"), + ) + + # connect the write FIFO inputs + self.comb += [ write_fifo_din.adr.eq(current_adr), # recorded + write_fifo_din.data.eq(~ad_i_n), # we do it live, direct from the bus as we use it at the same time we update sampled_ad + write_fifo_din.sel.eq(current_sel), # recorded + ] + # deal with emptying the Write FIFO to the write WB + self.comb += [ wb_write.cyc.eq(write_fifo.readable), + wb_write.stb.eq(write_fifo.readable), + wb_write.we.eq(1), + wb_write.adr.eq(write_fifo_dout.adr[2:32]), + wb_write.dat_w.eq(write_fifo_dout.data), + wb_write.sel.eq(write_fifo_dout.sel), + write_fifo.re.eq(wb_write.ack), + ] + + owning_bus = Signal(reset = 0) # fixme ; theoretically one can bypass arbitration when owning the bus + + start_arbitration = Signal() + grant = Signal() + master_oe = Signal() + + nubus_sync = getattr(self.sync, cd_nubus) + nubus_sync += [ + If(sampled_rqst & ~start_arbitration, + owning_bus.eq(0), + ) + ] + + self.submodules.dma_fsm = dma_fsm = ClockDomainsRenamer(cd_nubus)(FSM(reset_state="Reset")) + dma_fsm.act("Reset", + NextState("Idle") + ) + dma_fsm.act("Idle", + If(wb_dma.cyc & wb_dma.stb & ~sampled_rqst, # we need the bus and it's not being requested + If(owning_bus, # we own the bus, skip arbitration + NextState("AdrCycle"), + ).Else( # go for arbitration + NextState("Arbitration"), + ), + ) + ) + dma_fsm.act("Arbitration", + start_arbitration.eq(1), + rqst_oe.eq(1), + rqst_o_n.eq(0), + NextState("WaitForGrant"), + ) + dma_fsm.act("WaitForGrant", + start_arbitration.eq(1), + rqst_oe.eq(1), + rqst_o_n.eq(0), + If(grant & ~decoded_busy, # I'm now 'owner' + NextValue(owning_bus, 1), + NextState("AdrCycle"), + ) + ) + dma_fsm.act("AdrCycle", + start_arbitration.eq(0), + master_oe.eq(1), # for start + tmo_oe.eq(1), # for tm0, tm1, ack + ad_oe.eq(1), # for write address + start_o_n.eq(0), + tm0_o_n.eq(~((wb_dma.sel == 0x1) | (wb_dma.sel == 0x2) | (wb_dma.sel == 0x4) | (wb_dma.sel == 0x8))), # byte only + tm1_o_n.eq(~wb_dma.we), + ad_o_n[0].eq(~((wb_dma.sel == 0x2) | (wb_dma.sel == 0x3) | (wb_dma.sel == 0x8) | (wb_dma.sel == 0xc))), # odd bytes, both half-words + ad_o_n[1].eq(~((wb_dma.sel == 0x4) | (wb_dma.sel == 0x8) | (wb_dma.sel == 0xc))), # upper bytes and half-word + ad_o_n[2:32].eq(~wb_dma.adr), + ack_o_n.eq(1), + If(wb_dma.we, + NextState("DatCycle"), + ).Else( + NextState("ReadWaitForAck"), + ) + ) + dma_fsm.act("DatCycle", + master_oe.eq(1), # for start + ad_oe.eq(1), # for write data + start_o_n.eq(1), # start finished, but still need to be driven + ad_o_n.eq(~wb_dma.dat_w), + If(sampled_ack, + wb_dma.ack.eq(1), + # fixme: check status ??? (tm0 and tm1 should be active for no-error) + NextState("FinishCycle"), + ) + ) + dma_fsm.act("FinishCycle", + master_oe.eq(1), # for start + start_o_n.eq(1), # start finished, but still need to be driven + tmo_oe.eq(1), # for tm0, tm1, ack, need to be driven to inactive + tm0_o_n.eq(1), + tm1_o_n.eq(1), + ack_o_n.eq(1), + NextState("Idle"), + ) + dma_fsm.act("ReadWaitForAck", + master_oe.eq(1), # for start + start_o_n.eq(1), # start finished, but still need to be driven + wb_dma.dat_r.eq(sampled_ad), + If(sampled_ack, + wb_dma.ack.eq(1), + # fixme: check status ??? (tm0 and tm1 should be active for no-error) + NextState("FinishCycle"), + ) + ) + + # stuff at this end so we don't use the signals inadvertantly + + # real NuBus signals + nub_tm0n = platform.request("tm0_3v3_n") + nub_tm1n = platform.request("tm1_3v3_n") + nub_startn = platform.request("start_3v3_n") + nub_ackn = platform.request("ack_3v3_n") + nub_adn = platform.request("ad_3v3_n") + nub_idn = platform.request("id_3v3_n") + + # Tri-state + self.specials += Tristate(nub_tm0n, tm0_o_n, tmo_oe, tm0_i_n) + self.specials += Tristate(nub_tm1n, tm1_o_n, tmo_oe, tm1_i_n) + self.specials += Tristate(nub_ackn, ack_o_n, tmo_oe, ack_i_n) + self.specials += Tristate(nub_adn, ad_o_n, ad_oe, ad_i_n) + self.specials += Tristate(nub_startn, start_o_n, master_oe, start_i_n) + self.comb += [ + id_i_n.eq(nub_idn), + ] + + # NubusFPGA-only signals + nf_tmoen = platform.request("tmoen") + nf_nubus_ad_dir = platform.request("nubus_ad_dir") + + self.comb += [ + nf_tmoen.eq(~tmo_oe), + nf_nubus_ad_dir.eq(~ad_oe), + ] + + # real Nubus signal, for master + nub_rqstn = platform.request("rqst_3v3_n") + + # Tri-state + self.specials += Tristate(nub_rqstn, rqst_o_n, rqst_oe, rqst_i_n) + + # NubusFPGA-only signals, for master + nub_arbcy_n = platform.request("arbcy_n") + nf_grant = platform.request("grant") + nf_nubus_master_dir = platform.request("nubus_master_dir") + nf_fpga_to_cpld_signal = platform.request("fpga_to_cpld_signal") + + # NuBus90 signals, , for completeness + nub_clk2xn = ClockSignal(cd_nubus90) + nub_tm2n = platform.request("tm2_3v3_n") + + self.comb += [ + nf_nubus_master_dir.eq(master_oe), + nub_arbcy_n.eq(~start_arbitration), + grant.eq(nf_grant), + nf_fpga_to_cpld_signal.eq(~rqst_oe), + ] + + self.sync += [ + If((~nub_clk & nub_clk_prev[0]), # simultaneous with setting negedge + decoded_busy.eq(~decoded_busy & nub_ackn & ~nub_startn # beginning of transaction + | decoded_busy & nub_ackn & nub_resetn), # hold during cycle + ) + ] + + + def add_sources(self, platform): + # sampling of data on falling edge of clock, done in verilog + platform.add_source("nubus_sampling.v", "verilog") diff --git a/nubus-to-ztex-gateware/nubus_to_fpga_soc.py b/nubus-to-ztex-gateware/nubus_to_fpga_soc.py index fae2bdc..d34fa8a 100644 --- a/nubus-to-ztex-gateware/nubus_to_fpga_soc.py +++ b/nubus-to-ztex-gateware/nubus_to_fpga_soc.py @@ -18,6 +18,7 @@ import nubus_to_fpga_export import nubus import nubus_full +import nubus_full_sampling import nubus_stat from litedram.modules import MT41J128M16 @@ -317,15 +318,14 @@ class NuBusFPGA(SoCCore): self.bus.add_slave("DMA", self.wishbone_slave_sys, SoCRegion(origin=self.mem_map.get("master", None), size=0x40000000, cached=False)) else: wishbone_master_sys = wishbone.Interface(data_width=self.bus.data_width) - self.submodules.wishbone_master_nubus = WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_master_sys, cd_master="nubus", cd_slave="sys") nubus_writemaster_sys = wishbone.Interface(data_width=self.bus.data_width) wishbone_slave_nubus = wishbone.Interface(data_width=self.bus.data_width) self.submodules.wishbone_slave_sys = WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_slave_nubus, cd_master="sys", cd_slave="nubus", force_delay=6) # force delay needed to avoid back-to-back transaction running into issue https://github.com/alexforencich/verilog-wishbone/issues/4 - self.submodules.nubus = nubus_full.NuBus(platform=platform, - wb_read=self.wishbone_master_nubus, - wb_write=nubus_writemaster_sys, - wb_dma=wishbone_slave_nubus, - cd_nubus="nubus") + self.submodules.nubus = nubus_full_sampling.NuBus(platform=platform, + wb_read=wishbone_master_sys, + wb_write=nubus_writemaster_sys, + wb_dma=wishbone_slave_nubus, + cd_nubus="nubus") self.bus.add_master(name="NuBusBridgeToWishbone", master=wishbone_master_sys) self.bus.add_slave("DMA", self.wishbone_slave_sys, SoCRegion(origin=self.mem_map.get("master", None), size=0x40000000, cached=False)) self.bus.add_master(name="NuBusBridgeToWishboneWrite", master=nubus_writemaster_sys) diff --git a/nubus-to-ztex-gateware/slave_tb.sv b/nubus-to-ztex-gateware/slave_tb.sv index bb7b18b..76762b3 100644 --- a/nubus-to-ztex-gateware/slave_tb.sv +++ b/nubus-to-ztex-gateware/slave_tb.sv @@ -487,19 +487,19 @@ module nubus_slave_tb (); always begin tst_clkn <= 1; - #75; + #75.075; tst_clkn <= 0; if (DEBUG_NUBUS_START) begin if (~nub_startn) $display ("%g (NuBus Start) /ad: $%h {/tmadn}: %b%b%b%b", $time, nub_adn, nub_tm1n, nub_tm0n, nub_adn[1], nub_adn[0]); end - #25; + #25.025; end always begin tst_clk2xn <= 0; - #25; + #25.025; tst_clk2xn <= 1; - #25; + #25.025; end always begin