Stuff for exa/composite support in X11 (SBusFPGA only for now)

This commit is contained in:
Romain Dolbeau 2022-08-20 18:54:30 +02:00
parent d9f964dd47
commit 6e8b0192e2
5 changed files with 8289 additions and 1011 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -31,25 +31,70 @@
typedef unsigned int uint32_t;
typedef volatile unsigned int u_int32_t;
/*
struct control_blitter {
volatile unsigned int fun;
volatile unsigned int done;
volatile unsigned short arg[8];
};
*/
// X11 graphics functions
#define GXclear 0x0 /* 0 */
#define GXand 0x1 /* src AND dst */
#define GXandReverse 0x2 /* src AND NOT dst */
#define GXcopy 0x3 /* src */
#define GXandInverted 0x4 /* NOT src AND dst */
#define GXnoop 0x5 /* dst */
#define GXxor 0x6 /* src XOR dst */
#define GXor 0x7 /* src OR dst */
#define GXnor 0x8 /* NOT src AND NOT dst */
#define GXequiv 0x9 /* NOT src XOR dst */
#define GXinvert 0xa /* NOT dst */
#define GXorReverse 0xb /* src OR NOT dst */
#define GXcopyInverted 0xc /* NOT src */
#define GXorInverted 0xd /* NOT src OR dst */
#define GXnand 0xe /* NOT src OR NOT dst */
#define GXset 0xf /* 1 */
#define FUN_BLIT_BIT 0 // hardwired in goblin_accel.py
#define FUN_FILL_BIT 1 // hardwired in goblin_accel.py
#define FUN_PATT_BIT 2 // hardwired in goblin_accel.py
#define FUN_TEST_BIT 3 // hardwired in goblin_accel.py
#define FUN_DONE_BIT 31
// Xrender op
#define PictOpClear (0x80 | 0x0)
#define PictOpSrc (0x80 | 0x1)
#define PictOpDst (0x80 | 0x2)
#define PictOpOver (0x80 | 0x3)
#define PictOpOverReverse (0x80 | 0x4)
#define PictOpIn (0x80 | 0x5)
#define PictOpInReverse (0x80 | 0x6)
#define PictOpOut (0x80 | 0x7)
#define PictOpOutReverse (0x80 | 0x8)
#define PictOpAtop (0x80 | 0x9)
#define PictOpAtopReverse (0x80 | 0xa)
#define PictOpXor (0x80 | 0xb)
#define PictOpAdd (0x80 | 0xc)
#define PictOpSaturate (0x80 | 0xd)
// custom, with 0x40 for 'flip src'
#define PictOpFlipClear (0x80 | 0x40 | 0x0)
#define PictOpFlipSrc (0x80 | 0x40 | 0x1)
#define PictOpFlipDst (0x80 | 0x40 | 0x2)
#define PictOpFlipOver (0x80 | 0x40 | 0x3)
#define PictOpFlipOverReverse (0x80 | 0x40 | 0x4)
#define PictOpFlipIn (0x80 | 0x40 | 0x5)
#define PictOpFlipInReverse (0x80 | 0x40 | 0x6)
#define PictOpFlipOut (0x80 | 0x40 | 0x7)
#define PictOpFlipOutReverse (0x80 | 0x40 | 0x8)
#define PictOpFlipAtop (0x80 | 0x40 | 0x9)
#define PictOpFlipAtopReverse (0x80 | 0x40 | 0xa)
#define PictOpFlipXor (0x80 | 0x40 | 0xb)
#define PictOpFlipAdd (0x80 | 0x40 | 0xc)
#define PictOpFlipSaturate (0x80 | 0x40 | 0xd)
#define FUN_BLIT (1<<FUN_BLIT_BIT)
#define FUN_FILL (1<<FUN_FILL_BIT)
#define FUN_PATT (1<<FUN_PATT_BIT)
#define FUN_TEST (1<<FUN_TEST_BIT)
#define FUN_DONE (1<<FUN_DONE_BIT)
#define FUN_BLIT_BIT 0 // hardwired in goblin_accel.py
#define FUN_FILL_BIT 1 // hardwired in goblin_accel.py
#define FUN_PATT_BIT 2 // hardwired in goblin_accel.py
#define FUN_RSMSK8DST32_BIT 3 // hardwired in goblin_accel.py
#define FUN_RSRC32MSK32DST32_BIT 4 // hardwired in goblin_accel.py
#define FUN_RSRC32DST32_BIT 5 // hardwired in goblin_accel.py
#define FUN_DONE_BIT 31
#define FUN_BLIT (1<<FUN_BLIT_BIT)
#define FUN_FILL (1<<FUN_FILL_BIT)
#define FUN_PATT (1<<FUN_PATT_BIT)
#define FUN_RSMSK8DST32 (1<<FUN_RSMSK8DST32_BIT)
#define FUN_RSRC32MSK32DST32 (1<<FUN_RSRC32MSK32DST32_BIT)
#define FUN_RSRC32DST32 (1<<FUN_RSRC32DST32_BIT)
#define FUN_DONE (1<<FUN_DONE_BIT)
struct goblin_bt_regs {
u_int32_t mode;
@ -75,11 +120,11 @@ struct goblin_accel_regs {
u_int32_t reg_status; // 0
u_int32_t reg_cmd;
u_int32_t reg_r5_cmd;
u_int32_t resv0;
u_int32_t reg_op; // 3; X11 op or (0x80 | Render op)
u_int32_t reg_width; // 4
u_int32_t reg_height;
u_int32_t reg_fgcolor;
u_int32_t resv2;
u_int32_t reg_depth; // 7; 0 is native
u_int32_t reg_bitblt_src_x; // 8
u_int32_t reg_bitblt_src_y;
u_int32_t reg_bitblt_dst_x;
@ -88,6 +133,11 @@ struct goblin_accel_regs {
u_int32_t reg_dst_stride; // 13
u_int32_t reg_src_ptr; // 14
u_int32_t reg_dst_ptr; // 15
u_int32_t reg_bitblt_msk_x; // 16
u_int32_t reg_bitblt_msk_y;
u_int32_t reg_msk_stride; // 18
u_int32_t reg_msk_ptr; // 19
};
//#include "./rvintrin.h"
@ -169,10 +219,48 @@ static void patternrectfill(const unsigned_param_type xd,
const unsigned_param_type dst_stride
);
static void print_hexword(unsigned int v, unsigned int bx, unsigned int by);
static void show_status_on_screen(void);
static void bitblit_solid_msk8_dst32_fwd_fwd(const unsigned char op,
const unsigned_param_type xm,
const unsigned_param_type ym,
const unsigned_param_type wi,
const unsigned_param_type re,
const unsigned_param_type xd,
const unsigned_param_type yd,
const unsigned int fgcolor,
unsigned char* msk_ptr,
unsigned char* dst_ptr,
const unsigned_param_type msk_stride,
const unsigned_param_type dst_stride);
asm(".global _start\n"
static void bitblit_src32_msk32_dst32_fwd_fwd(const unsigned char op,
const unsigned_param_type xs,
const unsigned_param_type ys,
const unsigned_param_type xm,
const unsigned_param_type ym,
const unsigned_param_type wi,
const unsigned_param_type re,
const unsigned_param_type xd,
const unsigned_param_type yd,
unsigned char* src_ptr,
unsigned char* msk_ptr,
unsigned char* dst_ptr,
const unsigned_param_type src_stride,
const unsigned_param_type msk_stride,
const unsigned_param_type dst_stride);
static void bitblit_src32_dst32_fwd_fwd(const unsigned char op,
const unsigned_param_type xs,
const unsigned_param_type ys,
const unsigned_param_type wi,
const unsigned_param_type re,
const unsigned_param_type xd,
const unsigned_param_type yd,
unsigned char* src_ptr,
unsigned char* dst_ptr,
const unsigned_param_type src_stride,
const unsigned_param_type dst_stride);
asm(".global _start\n"
"_start:\n"
// ".word 0x0000500F\n" // flush cache ; should not be needed after reset
//"addi sp,zero,66\n" // 0x0042
@ -220,54 +308,80 @@ void from_reset(void) {
struct goblin_accel_regs* fbc = (struct goblin_accel_regs*)BASE_ACCEL_REGS;
struct goblin_bt_regs* fbt = (struct goblin_bt_regs*)BASE_BT_REGS;
unsigned int cmd = fbc->reg_r5_cmd;
unsigned char depth = fbc->reg_depth;
unsigned char op = fbc->reg_op;
uint32_t srcx, wi, dstx;
if (depth == 0) {
#if defined(GOBLIN_NUBUS)
switch ((fbt->mode>>24) & 0xFF) // mode is 8 bits wrong-endian (all fbt is wrong-endian in NuBus version)
switch ((fbt->mode>>24) & 0xFF) // mode is 8 bits wrong-endian (all fbt is wrong-endian in NuBus version)
#elif defined(GOBLIN_SBUS)
switch (fbt->mode & 0xFF)
switch (fbt->mode & 0xFF)
#else
#error "Must define GOBLIN_NUBUS or GOBLIN_SBUS"
#endif
{
{
case mode_32bit:
depth = 32;
break;
case mode_16bit:
depth = 16;
break;
default:
case mode_8bit:
depth = 8;
break;
case mode_4bit:
depth = 4;
break;
case mode_2bit:
depth = 2;
break;
case mode_1bit:
depth = 1;
break;
}
}
switch (depth)
{
case 32:
srcx = fbc->reg_bitblt_src_x << 2;
wi = fbc->reg_width << 2;
dstx = fbc->reg_bitblt_dst_x << 2;
break;
case mode_16bit:
case 16:
srcx = fbc->reg_bitblt_src_x << 1;
wi = fbc->reg_width << 1;
dstx = fbc->reg_bitblt_dst_x << 1;
break;
default:
case mode_8bit:
case 8:
srcx = fbc->reg_bitblt_src_x;
wi = fbc->reg_width;
dstx = fbc->reg_bitblt_dst_x;
break;
case mode_4bit:
case 4:
srcx = fbc->reg_bitblt_src_x >> 1;
wi = fbc->reg_width >> 1;
dstx = fbc->reg_bitblt_dst_x >> 1;
break;
case mode_2bit:
case 2:
srcx = fbc->reg_bitblt_src_x >> 2;
wi = fbc->reg_width >> 2;
dstx = fbc->reg_bitblt_dst_x >> 2;
break;
case mode_1bit:
case 1:
srcx = fbc->reg_bitblt_src_x >> 3;
wi = fbc->reg_width >> 3;
dstx = fbc->reg_bitblt_dst_x >> 3;
break;
}
switch (cmd & 0xF) {
switch (cmd & 0xFF) {
case FUN_BLIT: {
bitblit(srcx, fbc->reg_bitblt_src_y,
wi , fbc->reg_height,
dstx, fbc->reg_bitblt_dst_y,
0xFF, 0x3, // GXcopy
0xFF, op, // FIXME: re-add planemask support for X11 ops
fbc->reg_src_ptr ? (unsigned char*)fbc->reg_src_ptr : (unsigned char*)BASE_FB,
fbc->reg_dst_ptr ? (unsigned char*)fbc->reg_dst_ptr : (unsigned char*)BASE_FB,
fbc->reg_src_stride,
@ -290,6 +404,50 @@ void from_reset(void) {
fbc->reg_dst_ptr ? (unsigned char*)fbc->reg_dst_ptr : (unsigned char*)BASE_FB,
fbc->reg_dst_stride); // assumed to be scaled already
} break;
case FUN_RSMSK8DST32: {
bitblit_solid_msk8_dst32_fwd_fwd(op,
fbc->reg_bitblt_msk_x, // unscaled
fbc->reg_bitblt_msk_y,
fbc->reg_width, // NOT scaled here, we assume depth == 32 here
fbc->reg_height,
dstx, // still scaled for the PTR calculation ...
fbc->reg_bitblt_dst_y,
fbc->reg_fgcolor,
fbc->reg_msk_ptr ? (unsigned char*)fbc->reg_msk_ptr : (unsigned char*)BASE_FB,
fbc->reg_dst_ptr ? (unsigned char*)fbc->reg_dst_ptr : (unsigned char*)BASE_FB,
fbc->reg_msk_stride, // assumed to be scaled already
fbc->reg_dst_stride); // assumed to be scaled already
} break;
case FUN_RSRC32MSK32DST32: {
bitblit_src32_msk32_dst32_fwd_fwd(op,
fbc->reg_bitblt_src_x, // unscaled
fbc->reg_bitblt_src_y,
fbc->reg_bitblt_msk_x, // unscaled
fbc->reg_bitblt_msk_y,
fbc->reg_width, // NOT scaled here, we assume depth == 32 here
fbc->reg_height,
dstx, // still scaled for the PTR calculation ...
fbc->reg_bitblt_dst_y,
fbc->reg_src_ptr ? (unsigned char*)fbc->reg_src_ptr : (unsigned char*)BASE_FB,
fbc->reg_msk_ptr ? (unsigned char*)fbc->reg_msk_ptr : (unsigned char*)BASE_FB,
fbc->reg_dst_ptr ? (unsigned char*)fbc->reg_dst_ptr : (unsigned char*)BASE_FB,
fbc->reg_src_stride, // assumed to be scaled already
fbc->reg_msk_stride, // assumed to be scaled already
fbc->reg_dst_stride); // assumed to be scaled already
} break;
case FUN_RSRC32DST32: {
bitblit_src32_dst32_fwd_fwd(op,
fbc->reg_bitblt_src_x, // unscaled
fbc->reg_bitblt_src_y,
fbc->reg_width, // NOT scaled here, we assume depth == 32 here
fbc->reg_height,
dstx, // still scaled for the PTR calculation ...
fbc->reg_bitblt_dst_y,
fbc->reg_src_ptr ? (unsigned char*)fbc->reg_src_ptr : (unsigned char*)BASE_FB,
fbc->reg_dst_ptr ? (unsigned char*)fbc->reg_dst_ptr : (unsigned char*)BASE_FB,
fbc->reg_src_stride, // assumed to be scaled already
fbc->reg_dst_stride); // assumed to be scaled already
} break;
default:
break;
}
@ -330,6 +488,8 @@ bitblit_proto(_xor);
bitblit_proto(_copy_pm);
bitblit_proto(_xor_pm);
bitblit_proto(_radd);
#define ROUTE_BITBLIT_PM(pm, bb) \
if (pm == 0xFF) bb(xs, ys, wi, re, xd, yd, pm, src_ptr, dst_ptr, src_stride, dst_stride); \
@ -352,47 +512,59 @@ static void bitblit(const unsigned_param_type xs,
if (ys > yd) {
switch(gxop) {
case 0x3: // GXcopy
case GXcopy:
ROUTE_BITBLIT_PM(pm, bitblit_fwd_fwd_copy);
break;
case 0x6: // GXxor
case GXxor:
ROUTE_BITBLIT_PM(pm, bitblit_fwd_fwd_xor);
break;
case PictOpAdd:
bitblit_fwd_fwd_radd(xs, ys, wi, re, xd, yd, pm, src_ptr, dst_ptr, src_stride, dst_stride);
break;
}
} else if (ys < yd) {
switch(gxop) {
case 0x3: // GXcopy
case GXcopy:
ROUTE_BITBLIT_PM(pm, bitblit_bwd_fwd_copy);
break;
case 0x6: // GXxor
case GXxor:
ROUTE_BITBLIT_PM(pm, bitblit_bwd_fwd_xor);
break;
case PictOpAdd:
bitblit_bwd_fwd_radd(xs, ys, wi, re, xd, yd, pm, src_ptr, dst_ptr, src_stride, dst_stride);
break;
}
} else { // ys == yd
if (xs > xd) {
switch(gxop) {
case 0x3: // GXcopy
case GXcopy:
ROUTE_BITBLIT_PM(pm, bitblit_fwd_fwd_copy);
break;
case 0x6: // GXxor
case GXxor:
ROUTE_BITBLIT_PM(pm, bitblit_fwd_fwd_xor);
break;
case PictOpAdd:
bitblit_fwd_fwd_radd(xs, ys, wi, re, xd, yd, pm, src_ptr, dst_ptr, src_stride, dst_stride);
break;
}
} else if (xs < xd) {
switch(gxop) {
case 0x3: // GXcopy
case GXcopy:
ROUTE_BITBLIT_PM(pm, bitblit_fwd_bwd_copy);
break;
case 0x6: // GXxor
case GXxor:
ROUTE_BITBLIT_PM(pm, bitblit_fwd_bwd_xor);
break;
case PictOpAdd:
bitblit_fwd_bwd_radd(xs, ys, wi, re, xd, yd, pm, src_ptr, dst_ptr, src_stride, dst_stride);
break;
}
} else { // xs == xd
switch(gxop) {
case 0x3: // GXcopy
case GXcopy:
/* don't bother */
break;
case 0x6: // GXxor
case GXxor:
rectfill_pm(xd, yd, wi, re, 0, pm, dst_ptr, dst_stride);
break;
}
@ -423,20 +595,19 @@ static void rectfill(const unsigned_param_type xd,
dptr_elt ++;
}
if (wi > 3) {
unsigned int u32color = (unsigned int)u8color | ((unsigned int)u8color)<<8 | ((unsigned int)u8color)<<16 | ((unsigned int)u8color)<<24;
if ((wi>15) && (((unsigned int)dptr_elt&0x7)==0)) {
register unsigned int s8 asm("s8");
register unsigned int s9 asm("s9");
s8 = u32color;
s9 = u32color;
s8 = color;
s9 = color;
for ( ; i < (wi-15) ; i+=16) {
sd(dptr_elt, 0, 0, s8, s9);
sd(dptr_elt, 8, 0, s8, s9);
_custom_sd(dptr_elt, 0, 0, s8, s9);
_custom_sd(dptr_elt, 8, 0, s8, s9);
dptr_elt += 16;
}
}
for ( ; i < (wi-3) ; i+=4) {
*(unsigned int*)dptr_elt = u32color;
*(unsigned int*)dptr_elt = color;
dptr_elt +=4;
}
}
@ -471,10 +642,9 @@ static void rectfill_pm(const unsigned_param_type xd,
dptr_elt ++;
}
if (wi > 3) {
unsigned int u32color = (unsigned int)u8color | ((unsigned int)u8color)<<8 | ((unsigned int)u8color)<<16 | ((unsigned int)u8color)<<24;
unsigned int u32pm = (unsigned int)pm | ((unsigned int)pm)<<8 | ((unsigned int)pm)<<16 | ((unsigned int)pm)<<24;
for ( ; i < (wi-3) ; i+=4) {
*(unsigned int*)dptr_elt = (u32color & u32pm) | (*(unsigned int*)dptr_elt & ~u32pm);
*(unsigned int*)dptr_elt = (color & u32pm) | (*(unsigned int*)dptr_elt & ~u32pm);
dptr_elt +=4;
}
}
@ -509,9 +679,8 @@ static void xorrectfill(const unsigned_param_type xd,
dptr_elt ++;
}
if (wi > 3) {
unsigned int u32color = (unsigned int)u8color | ((unsigned int)u8color)<<8 | ((unsigned int)u8color)<<16 | ((unsigned int)u8color)<<24;
for ( ; i < (wi-3) ; i+=4) {
*(unsigned int*)dptr_elt ^= u32color;
*(unsigned int*)dptr_elt ^= color;
dptr_elt +=4;
}
}
@ -545,10 +714,9 @@ static void xorrectfill_pm(const unsigned_param_type xd,
dptr_elt ++;
}
if (wi > 3) {
unsigned int u32color = (unsigned int)u8color | ((unsigned int)u8color)<<8 | ((unsigned int)u8color)<<16 | ((unsigned int)u8color)<<24;
unsigned int u32pm = (unsigned int)pm | ((unsigned int)pm)<<8 | ((unsigned int)pm)<<16 | ((unsigned int)pm)<<24;
for ( ; i < (wi-3) ; i+=4) {
*(unsigned int*)dptr_elt ^= (u32color & u32pm);
*(unsigned int*)dptr_elt ^= (color & u32pm);
dptr_elt +=4;
}
}
@ -593,7 +761,7 @@ static void invert(const unsigned_param_type xd,
}
}
// X11
// NOT using npm enables the use of 'cmix' in more cases
#define COPY(d,s,pm,npm) (d) = (s)
//#define COPY_PM(d,s,pm,npm) (d) = (((s) & (pm)) | ((d) & (npm)))
@ -601,6 +769,8 @@ static void invert(const unsigned_param_type xd,
#define XOR(d,s,pm,npm) (d) = ((s) ^ (d))
//#define XOR_PM(d,s,pm,npm) (d) = ((((s) ^ (d)) & (pm)) | ((d) & (npm)))
#define XOR_PM(d,s,pm,npm) (d) = ((((s) ^ (d)) & (pm)) | ((d) & (~pm)))
// Xrender
#define RADD(d,s,pm,npm) (d) = ukadd8((d), (s))
#define BLIT_FWD_FWD(NAME, OP) \
static void bitblit_fwd_fwd_##NAME(const unsigned_param_type xs, \
@ -792,7 +962,7 @@ static void invert(const unsigned_param_type xd,
BLIT_FWD_BWD(NAME, OP) \
BLIT_BWD_FWD(NAME, OP) \
#define BLIT_NOTALLDIR(NAME, OP) \
#define BLIT_NOTALLDIR(NAME, OP) \
BLIT_FWD_BWD(NAME, OP) \
BLIT_BWD_FWD(NAME, OP) \
@ -802,6 +972,7 @@ BLIT_ALLDIR(xor, XOR)
BLIT_ALLDIR(copy_pm, COPY_PM)
BLIT_ALLDIR(xor_pm, XOR_PM)
BLIT_ALLDIR(radd, RADD)
static void bitblit_fwd_fwd_copy(const unsigned_param_type xs,
const unsigned_param_type ys,
@ -849,7 +1020,7 @@ static void bitblit_fwd_fwd_copy(const unsigned_param_type xs,
}
sptr_elt = sptr_elt_al + ((unsigned int)sptr_elt & 0x3);
} else if ((xs & 0x7) != (xd & 0x7)) {
/* off-hy-4, can't use 64 ld/sd but still can use 32-bits */
/* off-hy-4, can't use 64 ld/sd directly (could pipeline the 32-bits data) but still can use 32-bits */
const unsigned int u32pm = (unsigned int)pm | ((unsigned int)pm)<<8 | ((unsigned int)pm)<<16 | ((unsigned int)pm)<<24;
const unsigned char* dptr_elt_end = dptr_elt + wi;
/* align dest & src (they are aligned the same here up to 0x3) */
@ -888,17 +1059,17 @@ static void bitblit_fwd_fwd_copy(const unsigned_param_type xs,
register unsigned int s9 asm("s9");
register unsigned int s10 asm("s10");
register unsigned int s11 asm("s11");
ld(sptr_elt, 0, s4, s5);
ld(sptr_elt, 16, s8, s9);
_custom_ld(sptr_elt, 0, s4, s5);
_custom_ld(sptr_elt, 16, s8, s9);
ld(sptr_elt, 8, s6, s7);
sd(dptr_elt, 0, 0, s4, s5);
sd(dptr_elt, 8, 0, s6, s7);
_custom_ld(sptr_elt, 8, s6, s7);
_custom_sd(dptr_elt, 0, 0, s4, s5);
_custom_sd(dptr_elt, 8, 0, s6, s7);
ld(sptr_elt, 24, s10, s11);
sd(dptr_elt, 16, 0, s8, s9);
_custom_ld(sptr_elt, 24, s10, s11);
_custom_sd(dptr_elt, 16, 0, s8, s9);
sptr_elt += 32;
sd(dptr_elt, 24, 0, s10, s11);
_custom_sd(dptr_elt, 24, 0, s10, s11);
dptr_elt += 32;
}
@ -908,19 +1079,19 @@ static void bitblit_fwd_fwd_copy(const unsigned_param_type xs,
register unsigned int s9 asm("s9");
register unsigned int s10 asm("s10");
register unsigned int s11 asm("s11");
ld(sptr_elt, 0, s8, s9);
ld(sptr_elt, 8, s10, s11);
sd(dptr_elt, 0, 0, s8, s9);
_custom_ld(sptr_elt, 0, s8, s9);
_custom_ld(sptr_elt, 8, s10, s11);
_custom_sd(dptr_elt, 0, 0, s8, s9);
sptr_elt += 16;
sd(dptr_elt, 8, 0, s10, s11);
_custom_sd(dptr_elt, 8, 0, s10, s11);
dptr_elt += 16;
}
#if 0
for ( ; (dptr_elt < (dptr_elt_last-7)) ; ) {
register unsigned int s8 asm("s8");
register unsigned int s9 asm("s9");
ld(sptr_elt, 0, s8, s9);
sd(dptr_elt, 0, 0, s8, s9);
_custom_ld(sptr_elt, 0, s8, s9);
_custom_sd(dptr_elt, 0, 0, s8, s9);
sptr_elt += 8;
dptr_elt += 8;
}
@ -993,3 +1164,340 @@ static void patternrectfill(const unsigned_param_type xd,
pat_ptr_line = pat_ptr + ((j+jo) & pat_ymask) * pat_stride;
}
}
#define bitblit_render_proto(a, b, suf) \
static void bitblit_solid_msk8_dst32##a##b##suf(const unsigned_param_type xm, \
const unsigned_param_type ym, \
const unsigned_param_type wi, \
const unsigned_param_type re, \
const unsigned_param_type xd, \
const unsigned_param_type yd, \
const unsigned int fgcolor, \
unsigned char* msk_ptr, \
unsigned char* dst_ptr, \
const unsigned_param_type msk_stride, \
const unsigned_param_type dst_stride); \
static void bitblit_src32_msk32_dst32##a##b##suf(const unsigned_param_type xs, \
const unsigned_param_type ys, \
const unsigned_param_type xm, \
const unsigned_param_type ym, \
const unsigned_param_type wi, \
const unsigned_param_type re, \
const unsigned_param_type xd, \
const unsigned_param_type yd, \
unsigned char* src_ptr, \
unsigned char* msk_ptr, \
unsigned char* dst_ptr, \
const unsigned_param_type src_stride, \
const unsigned_param_type msk_stride, \
const unsigned_param_type dst_stride); \
static void bitblit_src32_dst32##a##b##suf(const unsigned_param_type xs, \
const unsigned_param_type ys, \
const unsigned_param_type wi, \
const unsigned_param_type re, \
const unsigned_param_type xd, \
const unsigned_param_type yd, \
unsigned char* src_ptr, \
unsigned char* dst_ptr, \
const unsigned_param_type src_stride, \
const unsigned_param_type dst_stride);
bitblit_render_proto(_fwd, _fwd, _over)
bitblit_render_proto(_fwd, _fwd, _fover)
bitblit_render_proto(_fwd, _fwd, _outreverse)
static void bitblit_solid_msk8_dst32_fwd_fwd(const unsigned char op,
const unsigned_param_type xm,
const unsigned_param_type ym,
const unsigned_param_type wi,
const unsigned_param_type re,
const unsigned_param_type xd,
const unsigned_param_type yd,
const unsigned int fgcolor,
unsigned char* msk_ptr,
unsigned char* dst_ptr,
const unsigned_param_type msk_stride,
const unsigned_param_type dst_stride) {
switch (op) {
case PictOpOver:
bitblit_solid_msk8_dst32_fwd_fwd_over(xm, ym, wi, re, xd, yd, fgcolor, msk_ptr, dst_ptr, msk_stride, dst_stride);
break;
/* case PictOpOutReverse: */
/* bitblit_solid_msk8_dst32_fwd_fwd_outreverse(xm, ym, wi, re, xd, yd, fgcolor, msk_ptr, dst_ptr, msk_stride, dst_stride); */
/* break; */
default:
break;
}
}
static void bitblit_src32_msk32_dst32_fwd_fwd(const unsigned char op,
const unsigned_param_type xs,
const unsigned_param_type ys,
const unsigned_param_type xm,
const unsigned_param_type ym,
const unsigned_param_type wi,
const unsigned_param_type re,
const unsigned_param_type xd,
const unsigned_param_type yd,
unsigned char* src_ptr,
unsigned char* msk_ptr,
unsigned char* dst_ptr,
const unsigned_param_type src_stride,
const unsigned_param_type msk_stride,
const unsigned_param_type dst_stride)
{
switch (op) {
case PictOpOver:
bitblit_src32_msk32_dst32_fwd_fwd_over(xs, ys, xm, ym, wi, re, xd, yd, src_ptr, msk_ptr, dst_ptr, src_stride, msk_stride, dst_stride);
break;
default:
break;
}
}
static void bitblit_src32_dst32_fwd_fwd(const unsigned char op,
const unsigned_param_type xs,
const unsigned_param_type ys,
const unsigned_param_type wi,
const unsigned_param_type re,
const unsigned_param_type xd,
const unsigned_param_type yd,
unsigned char* src_ptr,
unsigned char* dst_ptr,
const unsigned_param_type src_stride,
const unsigned_param_type dst_stride)
{
switch (op) {
case PictOpOver:
bitblit_src32_dst32_fwd_fwd_over(xs, ys, wi, re, xd, yd, src_ptr, dst_ptr, src_stride, dst_stride);
break;
case PictOpFlipOver:
bitblit_src32_dst32_fwd_fwd_fover(xs, ys, wi, re, xd, yd, src_ptr, dst_ptr, src_stride, dst_stride);
break;
default:
break;
}
}
// Xrender
//#define TROVER(d,m,s) (d) = (m)*(s) + (d)*(0xff ^ (m)))
#define TROVERl(d,m,s) (d) = ufma8vlv((s), (m), ufma8vlv((d), (0xffffffff^(m)), 0))
#define TROVERl4(d0,d1,d2,d3,m0,m1,m2,m3,s0,s1,s2,s3) \
(d0) = ufma8vlv((d0), (0xffffffff^(m0)), 0); \
(d1) = ufma8vlv((d1), (0xffffffff^(m1)), 0); \
(d2) = ufma8vlv((d2), (0xffffffff^(m2)), 0); \
(d3) = ufma8vlv((d3), (0xffffffff^(m3)), 0); \
(d0) = ufma8vlv((s0), (m0), (d0)); \
(d1) = ufma8vlv((s1), (m1), (d1)); \
(d2) = ufma8vlv((s2), (m2), (d2)); \
(d3) = ufma8vlv((s3), (m3), (d3))
#define TROVERh(d,m,s) (d) = ufma8vhv((s), (m), ufma8vhv((d), (0xffffffff^(m)), 0))
#define TROVERh4(d0,d1,d2,d3,m0,m1,m2,m3,s0,s1,s2,s3) \
(d0) = ufma8vhv((d0), (0xffffffff^(m0)), 0); \
(d1) = ufma8vhv((d1), (0xffffffff^(m1)), 0); \
(d2) = ufma8vhv((d2), (0xffffffff^(m2)), 0); \
(d3) = ufma8vhv((d3), (0xffffffff^(m3)), 0); \
(d0) = ufma8vhv((s0), (m0), (d0)); \
(d1) = ufma8vhv((s1), (m1), (d1)); \
(d2) = ufma8vhv((s2), (m2), (d2)); \
(d3) = ufma8vhv((s3), (m3), (d3))
/*
3210
0321 // fsr by 8 ; could be rot
*/
static inline uint32_t pixelswap(const uint32_t p) {
/* uint32_t r = __builtin_bswap32(p); */
/* asm("fsr %0, %1, %2, %3\n" : "=r"(r) : "r"(r), "r"(r), "r"(8)); */
uint32_t r;
asm("fsr %0, %1, %2, %3\n" : "=r"(r) : "r"(p), "r"(p), "r"(8));
return r;
}
#define TRFOVERh(d,m,s) (d) = (ufma8vlv(pixelswap(s), (m), ufma8vlv((d), (0xffffffff^(m)), 0)))
#define TRFOVERh4(d0,d1,d2,d3,m0,m1,m2,m3,s0,s1,s2,s3) \
(d0) = ufma8vlv((d0), (0xffffffff^(m0)), 0); \
(d1) = ufma8vlv((d1), (0xffffffff^(m1)), 0); \
(d2) = ufma8vlv((d2), (0xffffffff^(m2)), 0); \
(d3) = ufma8vlv((d3), (0xffffffff^(m3)), 0); \
(d0) = (ufma8vlv(pixelswap(s0), (m0), (d0))); \
(d1) = (ufma8vlv(pixelswap(s1), (m1), (d1))); \
(d2) = (ufma8vlv(pixelswap(s2), (m2), (d2))); \
(d3) = (ufma8vlv(pixelswap(s3), (m3), (d3)))
#define TROUTREVl(d,m,s) (d) = ufma8vlv((d), (0xffffffff^(m)), 0)
#define TROUTREVl4(d0,d1,d2,d3,m0,m1,m2,m3,s0,s1,s2,s3) \
(d0) = ufma8vlv((d0), (0xffffffff^(m0)), 0); \
(d1) = ufma8vlv((d1), (0xffffffff^(m1)), 0); \
(d2) = ufma8vlv((d2), (0xffffffff^(m2)), 0); \
(d3) = ufma8vlv((d3), (0xffffffff^(m3)), 0)
#define BLITSM8D32_FWD_FWD(NAME, TOP, TOP4) \
static void bitblit_solid_msk8_dst32_fwd_fwd_##NAME(const unsigned_param_type xm, \
const unsigned_param_type ym, \
const unsigned_param_type wi, \
const unsigned_param_type re, \
const unsigned_param_type xd, \
const unsigned_param_type yd, \
const unsigned int fgcolor, \
unsigned char* msk_ptr, \
unsigned char* dst_ptr, \
const unsigned_param_type msk_stride, \
const unsigned_param_type dst_stride) { \
unsigned int i, j; \
unsigned char *mptr = (msk_ptr + (ym * msk_stride) + xm); \
unsigned char *dptr = (dst_ptr + (yd * dst_stride) + xd); \
unsigned char *mptr_line = mptr; \
unsigned char *dptr_line = dptr; \
for (j = 0 ; j < re ; j++) { \
unsigned char *mptr_elt = mptr_line; \
unsigned int *dptr_elt = (unsigned int*)dptr_line; \
i = 0; \
if (wi > 3) for ( ; i < (wi-3) ; i+= 4) { \
unsigned char m0 = *(mptr_elt+0); \
unsigned char m1 = *(mptr_elt+1); \
unsigned char m2 = *(mptr_elt+2); \
unsigned char m3 = *(mptr_elt+3); \
unsigned int d0 = *(dptr_elt+0); \
unsigned int d1 = *(dptr_elt+1); \
unsigned int d2 = *(dptr_elt+2); \
unsigned int d3 = *(dptr_elt+3); \
TOP4(d0,d1,d2,d3,m0,m1,m2,m3,fgcolor,fgcolor,fgcolor,fgcolor); \
*(dptr_elt+0) = d0; \
*(dptr_elt+1) = d1; \
*(dptr_elt+2) = d2; \
*(dptr_elt+3) = d3; \
dptr_elt += 4; \
mptr_elt += 4; \
} \
for ( ; i < wi ; i++) { \
TOP(*dptr_elt, *mptr_elt, fgcolor); \
dptr_elt ++; \
mptr_elt ++; \
} \
mptr_line += msk_stride; \
dptr_line += dst_stride; \
} \
}
BLITSM8D32_FWD_FWD(over, TROVERl, TROVERl4)
//BLITSM8D32_FWD_FWD(outreverse, TROUTREVl, TROUTREVl4)
#define BLITS32M32D32_FWD_FWD(NAME, TOP, TOP4) \
static void bitblit_src32_msk32_dst32_fwd_fwd_##NAME(const unsigned_param_type xs, \
const unsigned_param_type ys, \
const unsigned_param_type xm, \
const unsigned_param_type ym, \
const unsigned_param_type wi, \
const unsigned_param_type re, \
const unsigned_param_type xd, \
const unsigned_param_type yd, \
unsigned char* src_ptr, \
unsigned char* msk_ptr, \
unsigned char* dst_ptr, \
const unsigned_param_type src_stride, \
const unsigned_param_type msk_stride, \
const unsigned_param_type dst_stride) { \
unsigned int i, j; \
unsigned char *sptr = (src_ptr + (ys * src_stride) + xs); \
unsigned char *mptr = (msk_ptr + (ym * msk_stride) + xm); \
unsigned char *dptr = (dst_ptr + (yd * dst_stride) + xd); \
unsigned char *sptr_line = sptr; \
unsigned char *mptr_line = mptr; \
unsigned char *dptr_line = dptr; \
for (j = 0 ; j < re ; j++) { \
unsigned int *sptr_elt = (unsigned int*)sptr_line; \
unsigned int *mptr_elt = (unsigned int*)mptr_line; \
unsigned int *dptr_elt = (unsigned int*)dptr_line; \
i = 0; \
if (wi > 3) for ( ; i < (wi-3) ; i+= 4) { \
unsigned int s0 = *(sptr_elt+0); \
unsigned int s1 = *(sptr_elt+1); \
unsigned int s2 = *(sptr_elt+2); \
unsigned int s3 = *(sptr_elt+3); \
unsigned int m0 = *(mptr_elt+0); \
unsigned int m1 = *(mptr_elt+1); \
unsigned int m2 = *(mptr_elt+2); \
unsigned int m3 = *(mptr_elt+3); \
unsigned int d0 = *(dptr_elt+0); \
unsigned int d1 = *(dptr_elt+1); \
unsigned int d2 = *(dptr_elt+2); \
unsigned int d3 = *(dptr_elt+3); \
TOP4(d0,d1,d2,d3,m0,m1,m2,m3,s0,s1,s2,s3); \
*(dptr_elt+0) = d0; \
*(dptr_elt+1) = d1; \
*(dptr_elt+2) = d2; \
*(dptr_elt+3) = d3; \
sptr_elt += 4; \
dptr_elt += 4; \
mptr_elt += 4; \
} \
for ( ; i < wi ; i++) { \
TOP(*dptr_elt, *mptr_elt, *sptr_elt); \
sptr_elt ++; \
dptr_elt ++; \
mptr_elt ++; \
} \
sptr_line += dst_stride; \
mptr_line += msk_stride; \
dptr_line += dst_stride; \
} \
}
BLITS32M32D32_FWD_FWD(over, TROVERh, TROVERh4)
#define BLITS32D32_FWD_FWD(NAME, TOP, TOP4) \
static void bitblit_src32_dst32_fwd_fwd_##NAME(const unsigned_param_type xs, \
const unsigned_param_type ys, \
const unsigned_param_type wi, \
const unsigned_param_type re, \
const unsigned_param_type xd, \
const unsigned_param_type yd, \
unsigned char* src_ptr, \
unsigned char* dst_ptr, \
const unsigned_param_type src_stride, \
const unsigned_param_type dst_stride) { \
unsigned int i, j; \
unsigned char *sptr = (src_ptr + (ys * src_stride) + xs); \
unsigned char *dptr = (dst_ptr + (yd * dst_stride) + xd); \
unsigned char *sptr_line = sptr; \
unsigned char *dptr_line = dptr; \
for (j = 0 ; j < re ; j++) { \
unsigned int *sptr_elt = (unsigned int*)sptr_line; \
unsigned int *dptr_elt = (unsigned int*)dptr_line; \
i = 0; \
if (wi > 3) for ( ; i < (wi-3) ; i+= 4) { \
unsigned int s0 = *(sptr_elt+0); \
unsigned int s1 = *(sptr_elt+1); \
unsigned int s2 = *(sptr_elt+2); \
unsigned int s3 = *(sptr_elt+3); \
unsigned int d0 = *(dptr_elt+0); \
unsigned int d1 = *(dptr_elt+1); \
unsigned int d2 = *(dptr_elt+2); \
unsigned int d3 = *(dptr_elt+3); \
TOP4(d0,d1,d2,d3,s0,s1,s2,s3,s0,s1,s2,s3); \
*(dptr_elt+0) = d0; \
*(dptr_elt+1) = d1; \
*(dptr_elt+2) = d2; \
*(dptr_elt+3) = d3; \
sptr_elt += 4; \
dptr_elt += 4; \
} \
for ( ; i < wi ; i++) { \
TOP(*dptr_elt, *sptr_elt, *sptr_elt); \
sptr_elt ++; \
dptr_elt ++; \
} \
sptr_line += dst_stride; \
dptr_line += dst_stride; \
} \
}
BLITS32D32_FWD_FWD(over, TROVERh, TROVERh4)
BLITS32D32_FWD_FWD(fover, TRFOVERh, TRFOVERh4)

View File

@ -20,11 +20,11 @@ class GoblinAccel(Module): # AutoCSR ?
reg_status = Signal(32) # 0
reg_cmd = Signal(32) # 1
reg_r5_cmd = Signal(32) # 2, to communicate with Vex
# 3 resv0
reg_op = Signal(8) # 3, X11 op or (0x80 | Render op)
reg_width = Signal(COORD_BITS) # 4
reg_height = Signal(COORD_BITS) # 5
reg_fgcolor = Signal(32) # 6
# 7 resv2
reg_depth = Signal(8) # 7, depth (0 is native)
reg_bitblt_src_x = Signal(COORD_BITS) # 8
reg_bitblt_src_y = Signal(COORD_BITS) # 9
reg_bitblt_dst_x = Signal(COORD_BITS) # 10
@ -34,17 +34,26 @@ class GoblinAccel(Module): # AutoCSR ?
reg_src_ptr = Signal(32) # 14
reg_dst_ptr = Signal(32) # 15
reg_bitblt_msk_x = Signal(COORD_BITS) # 16
reg_bitblt_msk_y = Signal(COORD_BITS) # 17
reg_msk_stride = Signal(COORD_BITS) # 18
reg_msk_ptr = Signal(32) # 19
# do-some-work flags
do_blit = Signal()
do_fill = Signal()
do_patt = Signal()
do_test = Signal()
do_rsmsk8dst32 = Signal()
do_rsrc32msk32dst32 = Signal()
do_rsrc32dst32 = Signal()
# cmd register reg_cmd
DO_BLIT_BIT = 0
DO_FILL_BIT = 1
DO_PATT_BIT = 2
DO_TEST_BIT = 3
DO_RSMSK8DST32_BIT = 3
DO_RSRC32MSK32DST32_BIT = 4
DO_RSRC32DST32_BIT = 5
# global status register reg_status
WORK_IN_PROGRESS_BIT = 0
@ -83,14 +92,16 @@ class GoblinAccel(Module): # AutoCSR ?
NextValue(do_blit, bus_dat_w_endian[DO_BLIT_BIT] & ~reg_status[WORK_IN_PROGRESS_BIT]),
NextValue(do_fill, bus_dat_w_endian[DO_FILL_BIT] & ~reg_status[WORK_IN_PROGRESS_BIT]),
NextValue(do_patt, bus_dat_w_endian[DO_PATT_BIT] & ~reg_status[WORK_IN_PROGRESS_BIT]),
NextValue(do_test, bus_dat_w_endian[DO_TEST_BIT] & ~reg_status[WORK_IN_PROGRESS_BIT]),
NextValue(do_rsmsk8dst32, bus_dat_w_endian[DO_RSMSK8DST32_BIT] & ~reg_status[WORK_IN_PROGRESS_BIT]),
NextValue(do_rsrc32msk32dst32, bus_dat_w_endian[DO_RSRC32MSK32DST32_BIT] & ~reg_status[WORK_IN_PROGRESS_BIT]),
NextValue(do_rsrc32dst32, bus_dat_w_endian[DO_RSRC32DST32_BIT] & ~reg_status[WORK_IN_PROGRESS_BIT]),
],
2: [ NextValue(reg_r5_cmd, bus_dat_w_endian) ],
# 3
3: [ NextValue(reg_op, bus_dat_w_endian) ],
4: [ NextValue(reg_width, bus_dat_w_endian) ],
5: [ NextValue(reg_height, bus_dat_w_endian) ],
6: [ NextValue(reg_fgcolor, bus_dat_w_endian) ],
# 7
7: [ NextValue(reg_depth, bus_dat_w_endian) ],
8: [ NextValue(reg_bitblt_src_x, bus_dat_w_endian) ],
9: [ NextValue(reg_bitblt_src_y, bus_dat_w_endian) ],
10: [ NextValue(reg_bitblt_dst_x, bus_dat_w_endian) ],
@ -99,6 +110,11 @@ class GoblinAccel(Module): # AutoCSR ?
13: [ NextValue(reg_dst_stride, bus_dat_w_endian) ],
14: [ NextValue(reg_src_ptr, bus_dat_w_endian) ],
15: [ NextValue(reg_dst_ptr, bus_dat_w_endian) ],
16: [ NextValue(reg_bitblt_msk_x, bus_dat_w_endian) ],
17: [ NextValue(reg_bitblt_msk_y, bus_dat_w_endian) ],
18: [ NextValue(reg_msk_stride, bus_dat_w_endian) ],
19: [ NextValue(reg_msk_ptr, bus_dat_w_endian) ],
}),
NextValue(bus.ack, 1),
).Elif(bus.cyc & bus.stb & ~bus.we & ~bus.ack, #read
@ -107,11 +123,11 @@ class GoblinAccel(Module): # AutoCSR ?
0: [ NextValue(bus_dat_r_endian, reg_status) ],
1: [ NextValue(bus_dat_r_endian, reg_cmd) ],
2: [ NextValue(bus_dat_r_endian, reg_r5_cmd) ],
# 3
3: [ NextValue(bus_dat_r_endian, reg_op) ],
4: [ NextValue(bus_dat_r_endian, reg_width) ],
5: [ NextValue(bus_dat_r_endian, reg_height) ],
6: [ NextValue(bus_dat_r_endian, reg_fgcolor) ],
# 7
7: [ NextValue(bus_dat_r_endian, reg_depth) ],
8: [ NextValue(bus_dat_r_endian, reg_bitblt_src_x) ],
9: [ NextValue(bus_dat_r_endian, reg_bitblt_src_y) ],
10: [ NextValue(bus_dat_r_endian, reg_bitblt_dst_x) ],
@ -120,6 +136,11 @@ class GoblinAccel(Module): # AutoCSR ?
13: [ NextValue(bus_dat_r_endian, reg_dst_stride) ],
14: [ NextValue(bus_dat_r_endian, reg_src_ptr) ],
15: [ NextValue(bus_dat_r_endian, reg_dst_ptr) ],
16: [ NextValue(bus_dat_r_endian, reg_bitblt_msk_x) ],
17: [ NextValue(bus_dat_r_endian, reg_bitblt_msk_y) ],
18: [ NextValue(bus_dat_r_endian, reg_msk_stride) ],
19: [ NextValue(bus_dat_r_endian, reg_msk_ptr) ],
}),
NextValue(bus.ack, 1),
).Else(
@ -132,7 +153,9 @@ class GoblinAccel(Module): # AutoCSR ?
FUN_BLIT_BIT = 0
FUN_FILL_BIT = 1
FUN_PATT_BIT = 2
FUN_TEST_BIT = 3
FUN_RSMSK8DST32_BIT = 3
FUN_RSRC32MSK32DST32_BIT = 4
FUN_RSRC32DST32_BIT = 5
# to hold the Vex in reset
self.local_reset = local_reset = Signal(reset = 1)
@ -160,9 +183,21 @@ class GoblinAccel(Module): # AutoCSR ?
reg_status[WORK_IN_PROGRESS_BIT].eq(1),
local_reset.eq(0),
#timeout.eq(timeout_rst),
).Elif(do_test & ~reg_status[WORK_IN_PROGRESS_BIT],
do_test.eq(0),
reg_r5_cmd[FUN_TEST_BIT].eq(1),
).Elif(do_rsmsk8dst32 & ~reg_status[WORK_IN_PROGRESS_BIT],
do_rsmsk8dst32.eq(0),
reg_r5_cmd[FUN_RSMSK8DST32_BIT].eq(1),
reg_status[WORK_IN_PROGRESS_BIT].eq(1),
local_reset.eq(0),
#timeout.eq(timeout_rst),
).Elif(do_rsrc32msk32dst32 & ~reg_status[WORK_IN_PROGRESS_BIT],
do_rsrc32msk32dst32.eq(0),
reg_r5_cmd[FUN_RSRC32MSK32DST32_BIT].eq(1),
reg_status[WORK_IN_PROGRESS_BIT].eq(1),
local_reset.eq(0),
#timeout.eq(timeout_rst),
).Elif(do_rsrc32dst32 & ~reg_status[WORK_IN_PROGRESS_BIT],
do_rsrc32dst32.eq(0),
reg_r5_cmd[FUN_RSRC32DST32_BIT].eq(1),
reg_status[WORK_IN_PROGRESS_BIT].eq(1),
local_reset.eq(0),
#timeout.eq(timeout_rst),

View File

@ -72,8 +72,8 @@ asm(".set regnum_t6 , 31");
: "r" (base) \
); \
#define ld(base, imm12, o1, o2) opcode_ld(0x03, 0x03, base, imm12, o1, o2)
#define ldu(base, imm12, o1, o2) opcode_ld(0x03, 0x07, base, imm12, o1, o2)
#define _custom_ld(base, imm12, o1, o2) opcode_ld(0x03, 0x03, base, imm12, o1, o2)
#define _custom_ldu(base, imm12, o1, o2) opcode_ld(0x03, 0x07, base, imm12, o1, o2)
#define opcode_sd(opcode, func3, base, imm04, imm511, i1, i2) \
asm volatile(".word ((" #opcode ") | (" #imm04 " << 7) | (regnum_%0 << 15) | (regnum_%1 << 20) | (" #imm511 " << 25) | ((" #func3 ") << 12));" \
@ -81,4 +81,43 @@ asm(".set regnum_t6 , 31");
: "r" (base), "r" (i1), "r" (i2) \
); \
#define sd(base, imm04, imm511, i1, i2) opcode_sd(0x23, 0x03, base, imm04, imm511, i1, i2)
#define _custom_sd(base, imm04, imm511, i1, i2) opcode_sd(0x23, 0x03, base, imm04, imm511, i1, i2)
#define opcode_p(opcode, func3, func7, rd, rs1, rs2) \
asm volatile(".word ((" #opcode ") | (regnum_%0 << 7) | (regnum_%1 << 15) | (regnum_%2 << 20) | ((" #func3 ") << 12) | ((" #func7 ") << 25));" \
: "=r" (rd) \
: "r" (rs1), "r" (rs2) \
);
#define opcode_p_ter(opcode, func3, func7, rd, rs1, rs2) \
asm volatile(".word ((" #opcode ") | (regnum_%0 << 7) | (regnum_%1 << 15) | (regnum_%2 << 20) | ((" #func3 ") << 12) | ((" #func7 ") << 25));" \
: "+r" (rd) \
: "r" (rs1), "r" (rs2) \
);
#define _ukadd8(rd, rs1, rs2) opcode_p(0x00000077, 0x00, 0x1c, rd, rs1, rs2)
#define _uksub8(rd, rs1, rs2) opcode_p(0x00000077, 0x00, 0x1d, rd, rs1, rs2)
#define _ufma8vhv(rd, rs1, rs2) opcode_p_ter(0x00000077, 0x00, 0x64, rd, rs1, rs2)
#define _ufma8vlv(rd, rs1, rs2) opcode_p_ter(0x00000077, 0x00, 0x66, rd, rs1, rs2)
static inline unsigned int ukadd8(const unsigned int a, const unsigned int b) {
unsigned int r;
_ukadd8(r, a, b);
return r;
}
static inline unsigned int uksub8(const unsigned int a, const unsigned int b) {
unsigned int r;
_uksub8(r, a, b);
return r;
}
static inline unsigned int ufma8vhv(const unsigned int a, const unsigned int b, const unsigned int c) {
unsigned int r = c;
_ufma8vhv(r, a, b);
return r;
}
static inline unsigned int ufma8vlv(const unsigned int a, const unsigned int b, const unsigned int c) {
unsigned int r = c;
_ufma8vlv(r, a, b);
return r;
}