From d2ad149e56691c3b784f87ae188c4e509a431c6b Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Sun, 19 Dec 2021 16:43:18 -0500 Subject: [PATCH 1/2] Fill mode always runs right to left. --- Machines/Amiga/Blitter.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/Machines/Amiga/Blitter.cpp b/Machines/Amiga/Blitter.cpp index 26d96130c..1303034c7 100644 --- a/Machines/Amiga/Blitter.cpp +++ b/Machines/Amiga/Blitter.cpp @@ -274,7 +274,7 @@ bool Blitter::advance_dma() { // does-it-pass-the-tests? version. if(exclusive_fill_ || inclusive_fill_) { uint16_t fill_output = 0; - uint16_t bit = one_dot_ ? 0x0001 : 0x8000; + uint16_t bit = 0x0001; uint16_t flag = fill_carry ? bit : 0x0000; while(bit) { uint16_t pre_toggle = output & bit, post_toggle = pre_toggle; @@ -290,13 +290,8 @@ bool Blitter::advance_dma() { flag ^= post_toggle; fill_carry = flag; - if(one_dot_) { - bit <<= 1; - flag <<= 1; - } else { - bit >>= 1; - flag >>= 1; - } + bit <<= 1; + flag <<= 1; } output = fill_output; From d0e3024beca0909ce9fd48e6636940550631525e Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Sun, 19 Dec 2021 17:16:46 -0500 Subject: [PATCH 2/2] Switch to nibble-oriented lookup tables for fill mode. --- Machines/Amiga/Blitter.cpp | 121 +++++++++++++++++++++++++++++++------ 1 file changed, 101 insertions(+), 20 deletions(-) diff --git a/Machines/Amiga/Blitter.cpp b/Machines/Amiga/Blitter.cpp index 1303034c7..a179d0527 100644 --- a/Machines/Amiga/Blitter.cpp +++ b/Machines/Amiga/Blitter.cpp @@ -21,6 +21,98 @@ using namespace Amiga; +namespace { + +/// @returns Either the final carry flag or the output nibble when using fill mode given that it either @c is_exclusive fill mode, or isn't; +/// and the specified initial @c carry and input @c nibble. +template constexpr uint32_t fill_nibble(bool is_exclusive, uint8_t carry, uint8_t nibble) { + uint8_t fill_output = 0; + uint8_t bit = 0x01; + while(bit < 0x10) { + auto pre_toggle = nibble & bit, post_toggle = pre_toggle; + if(!is_exclusive) { + pre_toggle &= ~carry; // Accept bits that would transition to set immediately. + post_toggle &= carry; // Accept bits that would transition to clear after the fact. + } else { + post_toggle = 0; // Just do the pre-toggle. + } + + carry ^= pre_toggle; + fill_output |= carry; + carry ^= post_toggle; + + bit <<= 1; + carry <<= 1; + } + + if constexpr (wants_carry) { + return carry >> 4; + } else { + return fill_output; + } +} + +// Lookup key for these tables is: +// +// b0–b3: input nibble +// b4: carry +// b5: is_exclusive +// +// i.e. it's in the range [0, 63]. +// +// Tables below are indexed such that the higher-order bits select a table entry, lower-order bits select +// a bit or nibble from within the indexed item. + +constexpr uint32_t fill_carries[] = { + (fill_nibble(false, 0, 0x0) << 0x0) | (fill_nibble(false, 0, 0x1) << 0x1) | (fill_nibble(false, 0, 0x2) << 0x2) | (fill_nibble(false, 0, 0x3) << 0x3) | + (fill_nibble(false, 0, 0x4) << 0x4) | (fill_nibble(false, 0, 0x5) << 0x5) | (fill_nibble(false, 0, 0x6) << 0x6) | (fill_nibble(false, 0, 0x7) << 0x7) | + (fill_nibble(false, 0, 0x8) << 0x8) | (fill_nibble(false, 0, 0x9) << 0x9) | (fill_nibble(false, 0, 0xa) << 0xa) | (fill_nibble(false, 0, 0xb) << 0xb) | + (fill_nibble(false, 0, 0xc) << 0xc) | (fill_nibble(false, 0, 0xd) << 0xd) | (fill_nibble(false, 0, 0xe) << 0xe) | (fill_nibble(false, 0, 0xf) << 0xf) | + + (fill_nibble(false, 1, 0x0) << 0x10) | (fill_nibble(false, 1, 0x1) << 0x11) | (fill_nibble(false, 1, 0x2) << 0x12) | (fill_nibble(false, 1, 0x3) << 0x13) | + (fill_nibble(false, 1, 0x4) << 0x14) | (fill_nibble(false, 1, 0x5) << 0x15) | (fill_nibble(false, 1, 0x6) << 0x16) | (fill_nibble(false, 1, 0x7) << 0x17) | + (fill_nibble(false, 1, 0x8) << 0x18) | (fill_nibble(false, 1, 0x9) << 0x19) | (fill_nibble(false, 1, 0xa) << 0x1a) | (fill_nibble(false, 1, 0xb) << 0x1b) | + (fill_nibble(false, 1, 0xc) << 0x1c) | (fill_nibble(false, 1, 0xd) << 0x1d) | (fill_nibble(false, 1, 0xe) << 0x1e) | (fill_nibble(false, 1, 0xf) << 0x1f), + + (fill_nibble(true, 0, 0x0) << 0x0) | (fill_nibble(true, 0, 0x1) << 0x1) | (fill_nibble(true, 0, 0x2) << 0x2) | (fill_nibble(true, 0, 0x3) << 0x3) | + (fill_nibble(true, 0, 0x4) << 0x4) | (fill_nibble(true, 0, 0x5) << 0x5) | (fill_nibble(true, 0, 0x6) << 0x6) | (fill_nibble(true, 0, 0x7) << 0x7) | + (fill_nibble(true, 0, 0x8) << 0x8) | (fill_nibble(true, 0, 0x9) << 0x9) | (fill_nibble(true, 0, 0xa) << 0xa) | (fill_nibble(true, 0, 0xb) << 0xb) | + (fill_nibble(true, 0, 0xc) << 0xc) | (fill_nibble(true, 0, 0xd) << 0xd) | (fill_nibble(true, 0, 0xe) << 0xe) | (fill_nibble(true, 0, 0xf) << 0xf) | + + (fill_nibble(true, 1, 0x0) << 0x10) | (fill_nibble(true, 1, 0x1) << 0x11) | (fill_nibble(true, 1, 0x2) << 0x12) | (fill_nibble(true, 1, 0x3) << 0x13) | + (fill_nibble(true, 1, 0x4) << 0x14) | (fill_nibble(true, 1, 0x5) << 0x15) | (fill_nibble(true, 1, 0x6) << 0x16) | (fill_nibble(true, 1, 0x7) << 0x17) | + (fill_nibble(true, 1, 0x8) << 0x18) | (fill_nibble(true, 1, 0x9) << 0x19) | (fill_nibble(true, 1, 0xa) << 0x1a) | (fill_nibble(true, 1, 0xb) << 0x1b) | + (fill_nibble(true, 1, 0xc) << 0x1c) | (fill_nibble(true, 1, 0xd) << 0x1d) | (fill_nibble(true, 1, 0xe) << 0x1e) | (fill_nibble(true, 1, 0xf) << 0x1f), +}; + +constexpr uint32_t fill_values[] = { + (fill_nibble(false, 0, 0x0) << 0) | (fill_nibble(false, 0, 0x1) << 4) | (fill_nibble(false, 0, 0x2) << 8) | (fill_nibble(false, 0, 0x3) << 12) | + (fill_nibble(false, 0, 0x4) << 16) | (fill_nibble(false, 0, 0x5) << 20) | (fill_nibble(false, 0, 0x6) << 24) | (fill_nibble(false, 0, 0x7) << 28), + + (fill_nibble(false, 0, 0x8) << 0) | (fill_nibble(false, 0, 0x9) << 4) | (fill_nibble(false, 0, 0xa) << 8) | (fill_nibble(false, 0, 0xb) << 12) | + (fill_nibble(false, 0, 0xc) << 16) | (fill_nibble(false, 0, 0xd) << 20) | (fill_nibble(false, 0, 0xe) << 24) | (fill_nibble(false, 0, 0xf) << 28), + + (fill_nibble(false, 1, 0x0) << 0) | (fill_nibble(false, 1, 0x1) << 4) | (fill_nibble(false, 1, 0x2) << 8) | (fill_nibble(false, 1, 0x3) << 12) | + (fill_nibble(false, 1, 0x4) << 16) | (fill_nibble(false, 1, 0x5) << 20) | (fill_nibble(false, 1, 0x6) << 24) | (fill_nibble(false, 1, 0x7) << 28), + + (fill_nibble(false, 1, 0x8) << 0) | (fill_nibble(false, 1, 0x9) << 4) | (fill_nibble(false, 1, 0xa) << 8) | (fill_nibble(false, 1, 0xb) << 12) | + (fill_nibble(false, 1, 0xc) << 16) | (fill_nibble(false, 1, 0xd) << 20) | (fill_nibble(false, 1, 0xe) << 24) | (fill_nibble(false, 1, 0xf) << 28), + + (fill_nibble(true, 0, 0x0) << 0) | (fill_nibble(true, 0, 0x1) << 4) | (fill_nibble(true, 0, 0x2) << 8) | (fill_nibble(true, 0, 0x3) << 12) | + (fill_nibble(true, 0, 0x4) << 16) | (fill_nibble(true, 0, 0x5) << 20) | (fill_nibble(true, 0, 0x6) << 24) | (fill_nibble(true, 0, 0x7) << 28), + + (fill_nibble(true, 0, 0x8) << 0) | (fill_nibble(true, 0, 0x9) << 4) | (fill_nibble(true, 0, 0xa) << 8) | (fill_nibble(true, 0, 0xb) << 12) | + (fill_nibble(true, 0, 0xc) << 16) | (fill_nibble(true, 0, 0xd) << 20) | (fill_nibble(true, 0, 0xe) << 24) | (fill_nibble(true, 0, 0xf) << 28), + + (fill_nibble(true, 1, 0x0) << 0) | (fill_nibble(true, 1, 0x1) << 4) | (fill_nibble(true, 1, 0x2) << 8) | (fill_nibble(true, 1, 0x3) << 12) | + (fill_nibble(true, 1, 0x4) << 16) | (fill_nibble(true, 1, 0x5) << 20) | (fill_nibble(true, 1, 0x6) << 24) | (fill_nibble(true, 1, 0x7) << 28), + + (fill_nibble(true, 1, 0x8) << 0) | (fill_nibble(true, 1, 0x9) << 4) | (fill_nibble(true, 1, 0xa) << 8) | (fill_nibble(true, 1, 0xb) << 12) | + (fill_nibble(true, 1, 0xc) << 16) | (fill_nibble(true, 1, 0xd) << 20) | (fill_nibble(true, 1, 0xe) << 24) | (fill_nibble(true, 1, 0xf) << 28), +}; + +} + void Blitter::set_control(int index, uint16_t value) { if(index) { line_mode_ = (value & 0x0001); @@ -270,31 +362,20 @@ bool Blitter::advance_dma() { c_data_, minterms_); - // TODO: don't be so dense as below. This is the initial - // does-it-pass-the-tests? version. if(exclusive_fill_ || inclusive_fill_) { + // Use the fill tables nibble-by-nibble to figure out the filled word. uint16_t fill_output = 0; - uint16_t bit = 0x0001; - uint16_t flag = fill_carry ? bit : 0x0000; - while(bit) { - uint16_t pre_toggle = output & bit, post_toggle = pre_toggle; - if(inclusive_fill_) { - pre_toggle &= ~flag; // Accept bits that would transition to set immediately. - post_toggle &= flag; // Accept bits that would transition to clear after the fact. - } else { - post_toggle = 0; // Just do the pre toggle. - } - - flag ^= pre_toggle; - fill_output |= flag; - flag ^= post_toggle; - - fill_carry = flag; - bit <<= 1; - flag <<= 1; + int ongoing_carry = fill_carry; + const int type_mask = exclusive_fill_ ? (1 << 5) : 0; + for(int c = 0; c < 16; c += 4) { + const int total_index = (output & 0xf) | (ongoing_carry << 4) | type_mask; + fill_output |= ((fill_values[total_index >> 3] >> ((total_index & 7) * 4)) & 0xf) << c; + ongoing_carry = (fill_carries[total_index >> 5] >> (total_index & 31)) & 1; + output >>= 4; } output = fill_output; + fill_carry = ongoing_carry; } not_zero_flag_ |= output;