mirror of
https://github.com/rdolbeau/NuBusFPGA.git
synced 2024-12-21 19:29:21 +00:00
LD/LDU/SD (64-bits, dual regs) support in Vex + accel ; ramdisk tested in Q650
This commit is contained in:
parent
2d2cbdbafe
commit
173c87ea02
1
nubus-to-ztex-gateware/.gitignore
vendored
1
nubus-to-ztex-gateware/.gitignore
vendored
@ -11,3 +11,4 @@ blit.raw
|
||||
blit.s
|
||||
*.patch
|
||||
OLD
|
||||
nubusfpga_csr_*.h
|
||||
|
7
nubus-to-ztex-gateware/DeclROM/.gitignore
vendored
Normal file
7
nubus-to-ztex-gateware/DeclROM/.gitignore
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
vid_decl_rom.bin
|
||||
vid_decl_rom.dir
|
||||
vid_decl_rom.l
|
||||
vid_decl_rom.o
|
||||
vid_decl_rom.raw
|
||||
vid_decl_rom.srec
|
||||
*.bin
|
@ -93,6 +93,7 @@ vid_decl_rom.dir: vid_decl_rom.raw append_romdir
|
||||
|
||||
vid_decl_rom.bin: vid_decl_rom.dir
|
||||
${NUBUS_CHECKSUM} --input_file $< --output_file $@ --output_size 32768
|
||||
dd if=dump.cpr of=vid_decl_rom.bin bs=1 conv=notrunc
|
||||
|
||||
clean:
|
||||
rm -f res.inc ${CSRC_ASM} *.o vid_decl_rom.srec vid_decl_rom.raw vid_decl_rom.dir vid_decl_rom.l
|
||||
|
@ -17,7 +17,9 @@
|
||||
#warning "Using default VRES"
|
||||
#endif
|
||||
|
||||
#define GOBOFB_BASE 0x00900000
|
||||
#define GOBOFB_BASE 0x00900000
|
||||
#define GOBOFB_ACCEL 0x00901000
|
||||
#define GOBOFB_ACCEL_LE 0x00901800
|
||||
|
||||
//#define GOBOFB_REG_BASE 0x00900000
|
||||
//#define GOBOFB_MEM_BASE 0x00000000 /* remapped to 0x8f800000 by HW */
|
||||
@ -44,6 +46,34 @@
|
||||
#define GOBOFB_MODE_24BIT 0x10
|
||||
#define GOBOFB_MODE_15BIT 0x11
|
||||
|
||||
#define u_int32_t volatile unsigned long
|
||||
struct goblin_accel_regs {
|
||||
u_int32_t reg_status; // 0
|
||||
u_int32_t reg_cmd;
|
||||
u_int32_t reg_r5_cmd;
|
||||
u_int32_t resv0;
|
||||
u_int32_t reg_width; // 4
|
||||
u_int32_t reg_height;
|
||||
u_int32_t reg_fgcolor;
|
||||
u_int32_t resv2;
|
||||
u_int32_t reg_bitblt_src_x; // 8
|
||||
u_int32_t reg_bitblt_src_y;
|
||||
u_int32_t reg_bitblt_dst_x;
|
||||
u_int32_t reg_bitblt_dst_y;
|
||||
u_int32_t reg_src_stride; // 12
|
||||
u_int32_t reg_dst_stride;
|
||||
u_int32_t reg_src_ptr; // 12
|
||||
u_int32_t reg_dst_ptr;
|
||||
};
|
||||
|
||||
// status
|
||||
#define WORK_IN_PROGRESS_BIT 0
|
||||
|
||||
// cmd
|
||||
#define DO_BLIT_BIT 0 // hardwired in goblin_accel.py
|
||||
#define DO_FILL_BIT 1 // hardwired in goblin_accel.py
|
||||
#define DO_TEST_BIT 3 // hardwired in goblin_accel.py
|
||||
|
||||
struct MyGammaTbl {
|
||||
short gVersion; /*gamma version number*/
|
||||
short gType; /*gamma data type*/
|
||||
|
@ -276,10 +276,12 @@ OSErr cNuBusFPGACtl(CntrlParamPtr pb, /* DCtlPtr */ AuxDCEPtr dce)
|
||||
UInt32 a32_4p0, a32_4p1;
|
||||
const uint32_t wb = HRES >> idx;
|
||||
unsigned short j, i;
|
||||
|
||||
if (vPInfo->csPage != 0)
|
||||
return paramErr;
|
||||
|
||||
SwapMMUMode ( &busMode );
|
||||
#if 0
|
||||
if ((dStore->curMode != kDepthMode5) && (dStore->curMode != kDepthMode6)) {
|
||||
/* grey the screen */
|
||||
a32_l0 = a32;
|
||||
@ -313,6 +315,25 @@ OSErr cNuBusFPGACtl(CntrlParamPtr pb, /* DCtlPtr */ AuxDCEPtr dce)
|
||||
a32_l1 += 2*HRES*4;
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
#define WAIT_FOR_HW_LE(accel_le) \
|
||||
while (accel_le->reg_status & (1<<WORK_IN_PROGRESS_BIT))
|
||||
const UInt32 fgcolor = 0; // FIXME: per-depth?
|
||||
struct goblin_accel_regs* accel_le = (struct goblin_accel_regs*)(dce->dCtlDevBase+GOBOFB_ACCEL_LE);
|
||||
WAIT_FOR_HW_LE(accel_le);
|
||||
accel_le->reg_width = HRES; // pixels
|
||||
accel_le->reg_height = VRES;
|
||||
accel_le->reg_bitblt_dst_x = 0; // pixels
|
||||
accel_le->reg_bitblt_dst_y = 0;
|
||||
accel_le->reg_dst_ptr = 0;
|
||||
accel_le->reg_fgcolor = fgcolor;
|
||||
accel_le->reg_cmd = (1<<DO_FILL_BIT);
|
||||
WAIT_FOR_HW_LE(accel_le);
|
||||
|
||||
#undef WAIT_FOR_HW_LE
|
||||
|
||||
#endif
|
||||
SwapMMUMode ( &busMode );
|
||||
|
||||
ret = noErr;
|
||||
|
@ -24,6 +24,7 @@ UInt32 Primary(SEBlock* seblock) {
|
||||
/* PRIM_WRITEREG(GOBOFB_DEBUG, busMode);// trace */
|
||||
|
||||
/* grey the screen */
|
||||
/* should switch to HW ? */
|
||||
a32_l0 = a32;
|
||||
a32_l1 = a32 + HRES;
|
||||
for (j = 0 ; j < VRES ; j+= 2) {
|
||||
|
@ -12,11 +12,6 @@
|
||||
|
||||
struct RAMDrvContext {
|
||||
DrvSts2 drvsts;
|
||||
//Ptr origcopyfunc;
|
||||
//Ptr origdisk; /* keep unstripped pointers for Dispose*/
|
||||
//unsigned char * disk;
|
||||
//char initialized;
|
||||
//char alreadyalloced;
|
||||
};
|
||||
|
||||
#define DRIVE_SIZE_BYTES ((256ul-8ul)*1024ul*1024ul) // FIXME: mem size minus fb size
|
||||
|
@ -6,22 +6,27 @@
|
||||
OSErr cNuBusFPGARAMDskOpen(IOParamPtr pb, /* DCtlPtr */ AuxDCEPtr dce)
|
||||
{
|
||||
DrvSts2 *dsptr; // pointer to the DrvSts2 in our context
|
||||
DrvQElPtr dq;
|
||||
int drvnum = 1;
|
||||
struct RAMDrvContext *ctx;
|
||||
OSErr ret = noErr;
|
||||
char busMode;
|
||||
|
||||
busMode = 1;
|
||||
SwapMMUMode ( &busMode ); // to32 // this likely won't work on older MacII ???
|
||||
|
||||
dce->dCtlDevBase = 0xfc000000;
|
||||
dce->dCtlDevBase = 0xfc000000; // FIXME: why do we not get our slot properly ?
|
||||
|
||||
write_reg(dce, GOBOFB_DEBUG, 0xDEAD0000);
|
||||
/* write_reg(dce, GOBOFB_DEBUG, dce->dCtlRefNum); */
|
||||
|
||||
if (dce->dCtlStorage == nil) {
|
||||
DrvQElPtr dq;
|
||||
for(dq = (DrvQElPtr)(GetDrvQHdr())->qHead; dq; dq = (DrvQElPtr)dq->qLink) {
|
||||
if (dq->dQDrive >= drvnum)
|
||||
drvnum = dq->dQDrive+1;
|
||||
}
|
||||
|
||||
ReserveMemSys(sizeof(struct RAMDrvContext));
|
||||
dce->dCtlStorage = NewHandleSysClear(sizeof(struct RAMDrvContext));
|
||||
if (dce->dCtlStorage == nil) {
|
||||
ret = openErr;
|
||||
@ -65,7 +70,7 @@ OSErr cNuBusFPGARAMDskOpen(IOParamPtr pb, /* DCtlPtr */ AuxDCEPtr dce)
|
||||
write_reg(dce, GOBOFB_DEBUG, compressed[2]);
|
||||
write_reg(dce, GOBOFB_DEBUG, compressed[3]);
|
||||
*/
|
||||
res = rledec(superslot, compressed, 730);
|
||||
res = rledec(superslot, compressed, 730); // FIXME: 730 = 2920/4 (compressed size in words)
|
||||
/*
|
||||
write_reg(dce, GOBOFB_DEBUG, res);
|
||||
write_reg(dce, GOBOFB_DEBUG, 0xDEEEEEAD);
|
||||
@ -76,6 +81,7 @@ OSErr cNuBusFPGARAMDskOpen(IOParamPtr pb, /* DCtlPtr */ AuxDCEPtr dce)
|
||||
MyAddDrive(dsptr->dQRefNum, drvnum, (DrvQElPtr)&dsptr->qLink);
|
||||
}
|
||||
|
||||
SwapMMUMode ( &busMode );
|
||||
|
||||
done:
|
||||
return ret;
|
||||
|
@ -4,77 +4,82 @@
|
||||
|
||||
#ifndef SKIP_MAIN
|
||||
uint32_t rleenc(uint32_t* out, const uint32_t* in, const uint32_t len) {
|
||||
uint32_t i = 0, j = 0, p = 0, ib, k;
|
||||
int32_t c = 0;
|
||||
uint32_t i = 0, j = 0, p = 0, ib, k;
|
||||
int32_t c = 0;
|
||||
|
||||
p = in[0];
|
||||
p = in[0];
|
||||
|
||||
for (i = 1 ; i < len ; i++) {
|
||||
if (c == 0) { // just started
|
||||
if (in[i] == p) { // repeat
|
||||
c++;
|
||||
} else { // non-repeat
|
||||
p = in[i];
|
||||
c--;
|
||||
ib = i - 1;
|
||||
}
|
||||
} else if (c > 0) { // in-repeat
|
||||
if (in[i] == p) { // keep repeating
|
||||
c++;
|
||||
} else { // exit repeat
|
||||
out[j++] = __builtin_bswap32(c); // write result
|
||||
for (i = 1 ; i < len ; i++) {
|
||||
if (c == 0) { // just started
|
||||
if (in[i] == p) { // repeat
|
||||
c++;
|
||||
} else { // non-repeat
|
||||
p = in[i];
|
||||
c--;
|
||||
ib = i - 1;
|
||||
}
|
||||
} else if (c > 0) { // in-repeat
|
||||
if (in[i] == p) { // keep repeating
|
||||
c++;
|
||||
} else { // exit repeat
|
||||
out[j++] = __builtin_bswap32(c); // write result
|
||||
out[j++] = p;
|
||||
p = in[i]; // restart
|
||||
c = 0;
|
||||
}
|
||||
} else { // c < 0
|
||||
if (in[i] == p) { // exit non-repeat
|
||||
out[j++] = __builtin_bswap32(c+1); // write result, removing previous
|
||||
for (k = 0 ; k < (-c) ; k++)
|
||||
out[j++] = in[ib+k];
|
||||
p = in[i]; // restart
|
||||
c = 1; // this and previous
|
||||
} else { // non-repeat
|
||||
p = in[i];
|
||||
c--;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
out[j++] = __builtin_bswap32(c);
|
||||
out[j++] = p;
|
||||
p = in[i]; // restart
|
||||
c = 0;
|
||||
}
|
||||
} else { // c < 0
|
||||
if (in[i] == p) { // exit non-repeat
|
||||
out[j++] = __builtin_bswap32(c+1); // write result, removing previous
|
||||
for (k = 0 ; k < (-c) ; k++)
|
||||
out[j++] = in[ib+k];
|
||||
p = in[i]; // restart
|
||||
c = 1; // this and previous
|
||||
} else { // non-repeat
|
||||
p = in[i];
|
||||
c--;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
out[j++] = __builtin_bswap32(c);
|
||||
out[j++] = p;
|
||||
|
||||
return j;
|
||||
return j;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
uint32_t rledec(uint32_t* out, const uint32_t* in, const uint32_t len) {
|
||||
uint32_t i = 0, j = 0, k = 0, chk = 0, ib;
|
||||
uint32_t i = 0, j = 0, k = 0, chk = 0, ib;
|
||||
|
||||
for (i = 0 ; i < len ; ) {
|
||||
for (i = 0 ; i < len ; ) {
|
||||
#ifndef __m68k__
|
||||
int32_t c = (int32_t)__builtin_bswap32(in[i]);
|
||||
int32_t c = (int32_t)__builtin_bswap32(in[i]);
|
||||
#else
|
||||
int32_t c = (int32_t)(in[i]);
|
||||
int32_t c = (int32_t)(in[i]);
|
||||
#endif
|
||||
if (c >= 0) {
|
||||
chk += (1 + c);
|
||||
if (c < 10000) // !!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
for (k = 0 ; k < c+1 ; k++)
|
||||
out[j++] = in[i+1];
|
||||
else
|
||||
j += c+1;
|
||||
i += 2;
|
||||
} else {
|
||||
chk += 1 + -c;
|
||||
for (k = 0 ; k < 1 + -c ; k++)
|
||||
out[j++] = in[i+1+k];
|
||||
i += 2 + -c;
|
||||
}
|
||||
//fprintf(stderr, "%u: %u <> %u (%d, 0x%08x)\n", i, j, chk, c, in[i+1]);
|
||||
}
|
||||
return j;
|
||||
if (c >= 0) {
|
||||
chk += (1 + c);
|
||||
if (c < 300000) { // !!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
for (k = 0 ; k < (c + 1) ; k++)
|
||||
out[j++] = in[i+1];
|
||||
} else { // do a small subset at the beginning and end instead of the full range and assume this is padding otherwise
|
||||
for (k = 0 ; k < 4 ; k++)
|
||||
out[j+k] = in[i+1];
|
||||
for (k = c-3 ; k < (c + 1) ; k++)
|
||||
out[j+k] = in[i+1];
|
||||
j += c+1;
|
||||
}
|
||||
i += 2;
|
||||
} else {
|
||||
chk += (1 + -c);
|
||||
for (k = 0 ; k < (1 + -c) ; k++)
|
||||
out[j++] = in[i+1+k];
|
||||
i += 2 + -c;
|
||||
}
|
||||
//fprintf(stderr, "%u: %u <> %u (%d, 0x%08x)\n", i, j, chk, c, in[i+1]);
|
||||
}
|
||||
return j;
|
||||
}
|
||||
|
||||
#ifndef SKIP_MAIN
|
||||
@ -87,58 +92,58 @@ uint32_t rledec(uint32_t* out, const uint32_t* in, const uint32_t len) {
|
||||
#include <unistd.h>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int fd;
|
||||
uint32_t len, k;
|
||||
uint32_t *bufa, *bufb;
|
||||
FILE* f;
|
||||
int fd;
|
||||
uint32_t len, k;
|
||||
uint32_t *bufa, *bufb;
|
||||
FILE* f;
|
||||
|
||||
bufa = calloc(sizeof(uint32_t), 256*1024*1024/sizeof(uint32_t));
|
||||
bufb = calloc(sizeof(uint32_t), 256*1024*1024/sizeof(uint32_t));
|
||||
bufa = calloc(sizeof(uint32_t), 256*1024*1024/sizeof(uint32_t));
|
||||
bufb = calloc(sizeof(uint32_t), 256*1024*1024/sizeof(uint32_t));
|
||||
|
||||
fd = open("dump.raw", O_RDONLY);
|
||||
len = read(fd, bufa, 248*1024*1024ull) / 4;
|
||||
close(fd);
|
||||
fd = open("dump.raw", O_RDONLY);
|
||||
len = read(fd, bufa, 248*1024*1024ull) / 4;
|
||||
close(fd);
|
||||
|
||||
printf("File : %d bytes\n", len*4);
|
||||
printf("File : %d bytes\n", len*4);
|
||||
|
||||
len = rleenc(bufb, bufa, len);
|
||||
len = rleenc(bufb, bufa, len);
|
||||
|
||||
printf("Compressed : %d bytes\n", len*4);
|
||||
printf("Compressed : %d bytes\n", len*4);
|
||||
|
||||
/* for (k = 0 ; k < len ; k++) */
|
||||
/* bufb[k] = __builtin_bswap32(bufb[k]); */
|
||||
/* for (k = 0 ; k < len ; k++) */
|
||||
/* bufb[k] = __builtin_bswap32(bufb[k]); */
|
||||
|
||||
fd = open("dump.cpr", O_WRONLY | O_CREAT, S_IRWXU);
|
||||
/* len = */write (fd, bufb, len*4);
|
||||
close(fd);
|
||||
fd = open("dump.cpr", O_WRONLY | O_CREAT, S_IRWXU);
|
||||
/* len = */write (fd, bufb, len*4);
|
||||
close(fd);
|
||||
|
||||
/* for (k = 0 ; k < len ; k++) */
|
||||
/* bufb[k] = __builtin_bswap32(bufb[k]); */
|
||||
/* for (k = 0 ; k < len ; k++) */
|
||||
/* bufb[k] = __builtin_bswap32(bufb[k]); */
|
||||
|
||||
f = fopen("dump_cpr.c", "w");
|
||||
/* fprintf(f, "unsigned char* compressed[%d] = {\n", len*4); */
|
||||
/* for (k = 0 ; k < len*4 ; k++) { */
|
||||
/* fprintf(f, "0x%02x%s", ((unsigned char*)bufb)[k], */
|
||||
/* k == (len*4-1) ? "};" : (k%16 == 15 ? ",\n" : ",") */
|
||||
/* ); */
|
||||
/* } */
|
||||
fprintf(f, "unsigned long* compressed[%d] = {\n", len);
|
||||
for (k = 0 ; k < len ; k++) {
|
||||
fprintf(f, "0x%08x%s", bufb[k],
|
||||
k == (len-1) ? "};" : (k%8 == 7 ? ",\n" : ",")
|
||||
);
|
||||
}
|
||||
fclose(f);
|
||||
f = fopen("dump_cpr.c", "w");
|
||||
/* fprintf(f, "unsigned char* compressed[%d] = {\n", len*4); */
|
||||
/* for (k = 0 ; k < len*4 ; k++) { */
|
||||
/* fprintf(f, "0x%02x%s", ((unsigned char*)bufb)[k], */
|
||||
/* k == (len*4-1) ? "};" : (k%16 == 15 ? ",\n" : ",") */
|
||||
/* ); */
|
||||
/* } */
|
||||
fprintf(f, "unsigned long* compressed[%d] = {\n", len);
|
||||
for (k = 0 ; k < len ; k++) {
|
||||
fprintf(f, "0x%08x%s", bufb[k],
|
||||
k == (len-1) ? "};" : (k%8 == 7 ? ",\n" : ",")
|
||||
);
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
len = rledec(bufa, bufb, len);
|
||||
len = rledec(bufa, bufb, len);
|
||||
|
||||
printf("Uncompressed : %d bytes\n", len*4);
|
||||
printf("Uncompressed : %d bytes\n", len*4);
|
||||
|
||||
fd = open("dump.ucp", O_WRONLY | O_CREAT, S_IRWXU);
|
||||
len = write (fd, bufa, len*4);
|
||||
close(fd);
|
||||
fd = open("dump.ucp", O_WRONLY | O_CREAT, S_IRWXU);
|
||||
len = write (fd, bufa, len*4);
|
||||
close(fd);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -19,12 +19,12 @@ object GenGoblinAccel { // extends App {
|
||||
val config = VexRiscvConfig(
|
||||
plugins = List(
|
||||
new IBusCachedPlugin(
|
||||
resetVector = 0x70910000, // beginning of ROM
|
||||
resetVector = 0xF0910000l, // beginning of ROM
|
||||
relaxedPcCalculation = false,
|
||||
prediction = STATIC,
|
||||
config = InstructionCacheConfig(
|
||||
cacheSize = 512,
|
||||
bytePerLine = 32,
|
||||
cacheSize = 256,
|
||||
bytePerLine = 16,
|
||||
wayCount = 1,
|
||||
addressWidth = 32,
|
||||
cpuDataWidth = 32,
|
||||
@ -42,8 +42,8 @@ object GenGoblinAccel { // extends App {
|
||||
// ),
|
||||
new DBusCachedPlugin(
|
||||
config = new DataCacheConfig(
|
||||
cacheSize = 512,
|
||||
bytePerLine = 32,
|
||||
cacheSize = 256,
|
||||
bytePerLine = 16,
|
||||
wayCount = 2,
|
||||
addressWidth = 32,
|
||||
cpuDataWidth = 128,
|
||||
@ -51,7 +51,7 @@ object GenGoblinAccel { // extends App {
|
||||
catchAccessError = false,
|
||||
catchIllegal = false,
|
||||
catchUnaligned = false,
|
||||
pendingMax = 8, // 64
|
||||
pendingMax = 8, // 64 ; irrelevant? only for SMP?
|
||||
withWriteAggregation = true // required if memDataWidth > 32
|
||||
),
|
||||
dBusCmdMasterPipe = false, // prohibited if memDataWidth > 32
|
||||
@ -68,8 +68,8 @@ object GenGoblinAccel { // extends App {
|
||||
new DecoderSimplePlugin(
|
||||
catchIllegalInstruction = false
|
||||
),
|
||||
new RegFilePlugin(
|
||||
regFileReadyKind = plugin.SYNC,
|
||||
new RegFileOddEvenPlugin(
|
||||
regFileReadyKind = plugin.ASYNC, // FIXME why is even-odd failing with SYNC??? (and what's the difference...)
|
||||
zeroBoot = false
|
||||
),
|
||||
new IntAluPlugin,
|
||||
@ -83,7 +83,7 @@ object GenGoblinAccel { // extends App {
|
||||
//new BitManipZbaPlugin(earlyInjection = false), // sh.add
|
||||
//new BitManipZbbPlugin(earlyInjection = false), // zero-ext, min/max, others
|
||||
//new BitManipZbtPlugin(earlyInjection = false), // cmov, cmix, funnel
|
||||
new CG6Plugin(earlyInjection = false),
|
||||
new CG6Plugin(earlyInjection = false), // full-custom list
|
||||
new HazardSimplePlugin(
|
||||
bypassExecute = true,
|
||||
bypassMemory = true,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -81,6 +81,8 @@ struct goblin_accel_regs {
|
||||
|
||||
//#include "./rvintrin.h"
|
||||
|
||||
#include "ldsdsupport.h"
|
||||
|
||||
void from_reset(void) __attribute__ ((noreturn)); // nothrow,
|
||||
|
||||
static inline void flush_cache(void) {
|
||||
@ -376,6 +378,17 @@ static void rectfill(const unsigned_param_type xd,
|
||||
}
|
||||
if (wi > 3) {
|
||||
unsigned int u32color = (unsigned int)u8color | ((unsigned int)u8color)<<8 | ((unsigned int)u8color)<<16 | ((unsigned int)u8color)<<24;
|
||||
if ((wi>15) && (((unsigned int)dptr_elt&0x7)==0)) {
|
||||
register unsigned int s8 asm("s8");
|
||||
register unsigned int s9 asm("s9");
|
||||
s8 = u32color;
|
||||
s9 = u32color;
|
||||
for ( ; i < (wi-15) ; i+=16) {
|
||||
sd(dptr_elt, 0, 0, s8, s9);
|
||||
sd(dptr_elt, 8, 0, s8, s9);
|
||||
dptr_elt += 16;
|
||||
}
|
||||
}
|
||||
for ( ; i < (wi-3) ; i+=4) {
|
||||
*(unsigned int*)dptr_elt = u32color;
|
||||
dptr_elt +=4;
|
||||
@ -732,10 +745,139 @@ static void invert(const unsigned_param_type xd,
|
||||
BLIT_FWD_FWD(NAME, OP) \
|
||||
BLIT_FWD_BWD(NAME, OP) \
|
||||
BLIT_BWD_FWD(NAME, OP) \
|
||||
|
||||
#define BLIT_NOTALLDIR(NAME, OP) \
|
||||
BLIT_FWD_BWD(NAME, OP) \
|
||||
BLIT_BWD_FWD(NAME, OP) \
|
||||
|
||||
|
||||
BLIT_ALLDIR(copy, COPY)
|
||||
//BLIT_ALLDIR(copy, COPY)
|
||||
BLIT_NOTALLDIR(copy, COPY)
|
||||
BLIT_ALLDIR(xor, XOR)
|
||||
BLIT_ALLDIR(copy_pm, COPY_PM)
|
||||
BLIT_ALLDIR(xor_pm, XOR_PM)
|
||||
|
||||
|
||||
static void bitblit_fwd_fwd_copy(const unsigned_param_type xs,
|
||||
const unsigned_param_type ys,
|
||||
const unsigned_param_type wi,
|
||||
const unsigned_param_type re,
|
||||
const unsigned_param_type xd,
|
||||
const unsigned_param_type yd,
|
||||
const unsigned char pm,
|
||||
unsigned char* src_ptr,
|
||||
unsigned char* dst_ptr,
|
||||
const unsigned_param_type src_stride,
|
||||
const unsigned_param_type dst_stride) {
|
||||
unsigned int j;
|
||||
unsigned char *sptr = (src_ptr + (ys * src_stride) + xs);
|
||||
unsigned char *dptr = (dst_ptr + (yd * dst_stride) + xd);
|
||||
unsigned char *sptr_line = sptr;
|
||||
unsigned char *dptr_line = dptr;
|
||||
/*const unsigned char npm = ~pm;*/
|
||||
|
||||
for (j = 0 ; j < re ; j++) {
|
||||
register unsigned char *sptr_elt = sptr_line;
|
||||
unsigned char *dptr_elt = dptr_line;
|
||||
const unsigned char *dptr_elt_last = dptr_line + wi;
|
||||
if (wi>3) {
|
||||
if ((xs & 0x3) != (xd & 0x3)) {
|
||||
/* align dest, we'll deal with src via shift realignement using fsr */
|
||||
for ( ; (dptr_elt < dptr_elt_last) && ((unsigned int)dptr_elt&0x3)!=0; ) {
|
||||
dptr_elt[0] = sptr_elt[0];
|
||||
dptr_elt ++;
|
||||
sptr_elt ++;
|
||||
}
|
||||
unsigned char *sptr_elt_al = (unsigned char*)((unsigned int)sptr_elt & ~0x3);
|
||||
unsigned int fsr_cst = 8*((unsigned int)sptr_elt & 0x3);
|
||||
unsigned int src0 = ((unsigned int*)sptr_elt_al)[0];
|
||||
unsigned int u32pm = (unsigned int)pm | ((unsigned int)pm)<<8 | ((unsigned int)pm)<<16 | ((unsigned int)pm)<<24;
|
||||
/* handle unaligned src */
|
||||
for ( ; (dptr_elt < (dptr_elt_last-3)) ; ) {
|
||||
unsigned int src1 = ((unsigned int*)sptr_elt_al)[1];
|
||||
unsigned int val;
|
||||
asm("fsr %0, %1, %2, %3\n" : "=r"(val) : "r"(src0), "r"(src1), "r"(fsr_cst));
|
||||
((unsigned int*)dptr_elt)[0] = val;
|
||||
src0 = src1;
|
||||
dptr_elt += 4;
|
||||
sptr_elt_al += 4;
|
||||
}
|
||||
sptr_elt = sptr_elt_al + ((unsigned int)sptr_elt & 0x3);
|
||||
} else {
|
||||
const unsigned int u32pm = (unsigned int)pm | ((unsigned int)pm)<<8 | ((unsigned int)pm)<<16 | ((unsigned int)pm)<<24;
|
||||
const unsigned char* dptr_elt_end = dptr_elt + wi;
|
||||
/* align dest & src (they are aligned the same here) */
|
||||
for ( ; (dptr_elt < dptr_elt_last) && ((unsigned int)dptr_elt&0x3)!=0; ) {
|
||||
dptr_elt[0] = sptr_elt[0];
|
||||
dptr_elt ++;
|
||||
sptr_elt ++;
|
||||
}
|
||||
/* align to 8 for ls/sd */
|
||||
for ( ; (dptr_elt < (dptr_elt_last-3)) && ((unsigned int)dptr_elt&0x7)!=0;) {
|
||||
((unsigned int*)dptr_elt)[0] = ((unsigned int*)sptr_elt)[0];
|
||||
dptr_elt += 4;
|
||||
sptr_elt += 4;
|
||||
}
|
||||
#if 0
|
||||
for ( ; (dptr_elt < (dptr_elt_last-31)) ; ) {
|
||||
register unsigned int s4 asm("s4");
|
||||
register unsigned int s5 asm("s5");
|
||||
register unsigned int s6 asm("s6");
|
||||
register unsigned int s7 asm("s7");
|
||||
register unsigned int s8 asm("s8");
|
||||
register unsigned int s9 asm("s9");
|
||||
register unsigned int s10 asm("s10");
|
||||
register unsigned int s11 asm("s11");
|
||||
ld(sptr_elt, 0, s4, s5);
|
||||
ld(sptr_elt, 16, s8, s9);
|
||||
|
||||
ld(sptr_elt, 8, s6, s7);
|
||||
sd(dptr_elt, 0, 0, s4, s5);
|
||||
sd(dptr_elt, 8, 0, s6, s7);
|
||||
|
||||
ld(sptr_elt, 24, s10, s11);
|
||||
sd(dptr_elt, 16, 0, s8, s9);
|
||||
sptr_elt += 32;
|
||||
sd(dptr_elt, 24, 0, s10, s11);
|
||||
dptr_elt += 32;
|
||||
|
||||
}
|
||||
#endif
|
||||
for ( ; (dptr_elt < (dptr_elt_last-15)) ; ) {
|
||||
register unsigned int s8 asm("s8");
|
||||
register unsigned int s9 asm("s9");
|
||||
register unsigned int s10 asm("s10");
|
||||
register unsigned int s11 asm("s11");
|
||||
ld(sptr_elt, 0, s8, s9);
|
||||
ld(sptr_elt, 8, s10, s11);
|
||||
sd(dptr_elt, 0, 0, s8, s9);
|
||||
sptr_elt += 16;
|
||||
sd(dptr_elt, 8, 0, s10, s11);
|
||||
dptr_elt += 16;
|
||||
}
|
||||
#if 0
|
||||
for ( ; (dptr_elt < (dptr_elt_last-7)) ; ) {
|
||||
register unsigned int s8 asm("s8");
|
||||
register unsigned int s9 asm("s9");
|
||||
ld(sptr_elt, 0, s8, s9);
|
||||
sd(dptr_elt, 0, 0, s8, s9);
|
||||
sptr_elt += 8;
|
||||
dptr_elt += 8;
|
||||
}
|
||||
#endif
|
||||
for ( ; (dptr_elt < (dptr_elt_last-3)) ; ) {
|
||||
((unsigned int*)dptr_elt)[0] = ((unsigned int*)sptr_elt)[0];
|
||||
dptr_elt += 4;
|
||||
sptr_elt += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* common tail loop */
|
||||
for ( ; dptr_elt < dptr_elt_last ; ) {
|
||||
dptr_elt[0] = sptr_elt[0];
|
||||
dptr_elt ++;
|
||||
sptr_elt ++;
|
||||
}
|
||||
sptr_line += src_stride;
|
||||
dptr_line += dst_stride;
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ GCCPFX=riscv32-buildroot-linux-gnu-
|
||||
GCC=${GCCDIR}/bin/${GCCPFX}gcc
|
||||
OBJCOPY=${GCCDIR}/bin/${GCCPFX}objcopy
|
||||
|
||||
OPT=-Os #-fno-inline
|
||||
OPT=-O3 #-fno-inline
|
||||
ARCH=rv32im_zba_zbb_zbt
|
||||
|
||||
PARAM="-DBASE_FB=${BASE_FB}"
|
||||
|
84
nubus-to-ztex-gateware/ldsdsupport.h
Normal file
84
nubus-to-ztex-gateware/ldsdsupport.h
Normal file
@ -0,0 +1,84 @@
|
||||
#pragma once
|
||||
|
||||
asm(".set regnum_x0 , 0");
|
||||
asm(".set regnum_x1 , 1");
|
||||
asm(".set regnum_x2 , 2");
|
||||
asm(".set regnum_x3 , 3");
|
||||
asm(".set regnum_x4 , 4");
|
||||
asm(".set regnum_x5 , 5");
|
||||
asm(".set regnum_x6 , 6");
|
||||
asm(".set regnum_x7 , 7");
|
||||
asm(".set regnum_x8 , 8");
|
||||
asm(".set regnum_x9 , 9");
|
||||
asm(".set regnum_x10 , 10");
|
||||
asm(".set regnum_x11 , 11");
|
||||
asm(".set regnum_x12 , 12");
|
||||
asm(".set regnum_x13 , 13");
|
||||
asm(".set regnum_x14 , 14");
|
||||
asm(".set regnum_x15 , 15");
|
||||
asm(".set regnum_x16 , 16");
|
||||
asm(".set regnum_x17 , 17");
|
||||
asm(".set regnum_x18 , 18");
|
||||
asm(".set regnum_x19 , 19");
|
||||
asm(".set regnum_x20 , 20");
|
||||
asm(".set regnum_x21 , 21");
|
||||
asm(".set regnum_x22 , 22");
|
||||
asm(".set regnum_x23 , 23");
|
||||
asm(".set regnum_x24 , 24");
|
||||
asm(".set regnum_x25 , 25");
|
||||
asm(".set regnum_x26 , 26");
|
||||
asm(".set regnum_x27 , 27");
|
||||
asm(".set regnum_x28 , 28");
|
||||
asm(".set regnum_x29 , 29");
|
||||
asm(".set regnum_x30 , 30");
|
||||
asm(".set regnum_x31 , 31");
|
||||
|
||||
asm(".set regnum_zero, 0");
|
||||
asm(".set regnum_ra , 1");
|
||||
asm(".set regnum_sp , 2");
|
||||
asm(".set regnum_gp , 3");
|
||||
asm(".set regnum_tp , 4");
|
||||
asm(".set regnum_t0 , 5");
|
||||
asm(".set regnum_t1 , 6");
|
||||
asm(".set regnum_t2 , 7");
|
||||
asm(".set regnum_s0 , 8");
|
||||
asm(".set regnum_s1 , 9");
|
||||
asm(".set regnum_a0 , 10");
|
||||
asm(".set regnum_a1 , 11");
|
||||
asm(".set regnum_a2 , 12");
|
||||
asm(".set regnum_a3 , 13");
|
||||
asm(".set regnum_a4 , 14");
|
||||
asm(".set regnum_a5 , 15");
|
||||
asm(".set regnum_a6 , 16");
|
||||
asm(".set regnum_a7 , 17");
|
||||
asm(".set regnum_s2 , 18");
|
||||
asm(".set regnum_s3 , 19");
|
||||
asm(".set regnum_s4 , 20");
|
||||
asm(".set regnum_s5 , 21");
|
||||
asm(".set regnum_s6 , 22");
|
||||
asm(".set regnum_s7 , 23");
|
||||
asm(".set regnum_s8 , 24");
|
||||
asm(".set regnum_s9 , 25");
|
||||
asm(".set regnum_s10 , 26");
|
||||
asm(".set regnum_s11 , 27");
|
||||
asm(".set regnum_t3 , 28");
|
||||
asm(".set regnum_t4 , 29");
|
||||
asm(".set regnum_t5 , 30");
|
||||
asm(".set regnum_t6 , 31");
|
||||
|
||||
#define opcode_ld(opcode, func3, base, imm12, o1, o2) \
|
||||
asm volatile(".word ((" #opcode ") | (regnum_%0 << 7) | (regnum_%2 << 15) | (" #imm12 " << 20) | ((" #func3 ") << 12));" \
|
||||
: "=&r" (o1), "=&r" (o2) \
|
||||
: "r" (base) \
|
||||
); \
|
||||
|
||||
#define ld(base, imm12, o1, o2) opcode_ld(0x03, 0x03, base, imm12, o1, o2)
|
||||
#define ldu(base, imm12, o1, o2) opcode_ld(0x03, 0x07, base, imm12, o1, o2)
|
||||
|
||||
#define opcode_sd(opcode, func3, base, imm04, imm511, i1, i2) \
|
||||
asm volatile(".word ((" #opcode ") | (" #imm04 " << 7) | (regnum_%0 << 15) | (regnum_%1 << 20) | (" #imm511 " << 25) | ((" #func3 ") << 12));" \
|
||||
: \
|
||||
: "r" (base), "r" (i1), "r" (i2) \
|
||||
); \
|
||||
|
||||
#define sd(base, imm04, imm511, i1, i2) opcode_sd(0x23, 0x03, base, imm04, imm511, i1, i2)
|
Loading…
Reference in New Issue
Block a user