1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-11-04 15:05:36 +00:00

Merge pull request #1002 from TomHarte/FastBlitterFills

Switch to a table-based implementation of fill mode.
This commit is contained in:
Thomas Harte 2021-12-19 17:35:27 -05:00 committed by GitHub
commit bdf0a1941c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -21,6 +21,98 @@
using namespace Amiga;
namespace {
/// @returns Either the final carry flag or the output nibble when using fill mode given that it either @c is_exclusive fill mode, or isn't;
/// and the specified initial @c carry and input @c nibble.
template <bool wants_carry> constexpr uint32_t fill_nibble(bool is_exclusive, uint8_t carry, uint8_t nibble) {
uint8_t fill_output = 0;
uint8_t bit = 0x01;
while(bit < 0x10) {
auto pre_toggle = nibble & bit, post_toggle = pre_toggle;
if(!is_exclusive) {
pre_toggle &= ~carry; // Accept bits that would transition to set immediately.
post_toggle &= carry; // Accept bits that would transition to clear after the fact.
} else {
post_toggle = 0; // Just do the pre-toggle.
}
carry ^= pre_toggle;
fill_output |= carry;
carry ^= post_toggle;
bit <<= 1;
carry <<= 1;
}
if constexpr (wants_carry) {
return carry >> 4;
} else {
return fill_output;
}
}
// Lookup key for these tables is:
//
// b0b3: input nibble
// b4: carry
// b5: is_exclusive
//
// i.e. it's in the range [0, 63].
//
// Tables below are indexed such that the higher-order bits select a table entry, lower-order bits select
// a bit or nibble from within the indexed item.
constexpr uint32_t fill_carries[] = {
(fill_nibble<true>(false, 0, 0x0) << 0x0) | (fill_nibble<true>(false, 0, 0x1) << 0x1) | (fill_nibble<true>(false, 0, 0x2) << 0x2) | (fill_nibble<true>(false, 0, 0x3) << 0x3) |
(fill_nibble<true>(false, 0, 0x4) << 0x4) | (fill_nibble<true>(false, 0, 0x5) << 0x5) | (fill_nibble<true>(false, 0, 0x6) << 0x6) | (fill_nibble<true>(false, 0, 0x7) << 0x7) |
(fill_nibble<true>(false, 0, 0x8) << 0x8) | (fill_nibble<true>(false, 0, 0x9) << 0x9) | (fill_nibble<true>(false, 0, 0xa) << 0xa) | (fill_nibble<true>(false, 0, 0xb) << 0xb) |
(fill_nibble<true>(false, 0, 0xc) << 0xc) | (fill_nibble<true>(false, 0, 0xd) << 0xd) | (fill_nibble<true>(false, 0, 0xe) << 0xe) | (fill_nibble<true>(false, 0, 0xf) << 0xf) |
(fill_nibble<true>(false, 1, 0x0) << 0x10) | (fill_nibble<true>(false, 1, 0x1) << 0x11) | (fill_nibble<true>(false, 1, 0x2) << 0x12) | (fill_nibble<true>(false, 1, 0x3) << 0x13) |
(fill_nibble<true>(false, 1, 0x4) << 0x14) | (fill_nibble<true>(false, 1, 0x5) << 0x15) | (fill_nibble<true>(false, 1, 0x6) << 0x16) | (fill_nibble<true>(false, 1, 0x7) << 0x17) |
(fill_nibble<true>(false, 1, 0x8) << 0x18) | (fill_nibble<true>(false, 1, 0x9) << 0x19) | (fill_nibble<true>(false, 1, 0xa) << 0x1a) | (fill_nibble<true>(false, 1, 0xb) << 0x1b) |
(fill_nibble<true>(false, 1, 0xc) << 0x1c) | (fill_nibble<true>(false, 1, 0xd) << 0x1d) | (fill_nibble<true>(false, 1, 0xe) << 0x1e) | (fill_nibble<true>(false, 1, 0xf) << 0x1f),
(fill_nibble<true>(true, 0, 0x0) << 0x0) | (fill_nibble<true>(true, 0, 0x1) << 0x1) | (fill_nibble<true>(true, 0, 0x2) << 0x2) | (fill_nibble<true>(true, 0, 0x3) << 0x3) |
(fill_nibble<true>(true, 0, 0x4) << 0x4) | (fill_nibble<true>(true, 0, 0x5) << 0x5) | (fill_nibble<true>(true, 0, 0x6) << 0x6) | (fill_nibble<true>(true, 0, 0x7) << 0x7) |
(fill_nibble<true>(true, 0, 0x8) << 0x8) | (fill_nibble<true>(true, 0, 0x9) << 0x9) | (fill_nibble<true>(true, 0, 0xa) << 0xa) | (fill_nibble<true>(true, 0, 0xb) << 0xb) |
(fill_nibble<true>(true, 0, 0xc) << 0xc) | (fill_nibble<true>(true, 0, 0xd) << 0xd) | (fill_nibble<true>(true, 0, 0xe) << 0xe) | (fill_nibble<true>(true, 0, 0xf) << 0xf) |
(fill_nibble<true>(true, 1, 0x0) << 0x10) | (fill_nibble<true>(true, 1, 0x1) << 0x11) | (fill_nibble<true>(true, 1, 0x2) << 0x12) | (fill_nibble<true>(true, 1, 0x3) << 0x13) |
(fill_nibble<true>(true, 1, 0x4) << 0x14) | (fill_nibble<true>(true, 1, 0x5) << 0x15) | (fill_nibble<true>(true, 1, 0x6) << 0x16) | (fill_nibble<true>(true, 1, 0x7) << 0x17) |
(fill_nibble<true>(true, 1, 0x8) << 0x18) | (fill_nibble<true>(true, 1, 0x9) << 0x19) | (fill_nibble<true>(true, 1, 0xa) << 0x1a) | (fill_nibble<true>(true, 1, 0xb) << 0x1b) |
(fill_nibble<true>(true, 1, 0xc) << 0x1c) | (fill_nibble<true>(true, 1, 0xd) << 0x1d) | (fill_nibble<true>(true, 1, 0xe) << 0x1e) | (fill_nibble<true>(true, 1, 0xf) << 0x1f),
};
constexpr uint32_t fill_values[] = {
(fill_nibble<false>(false, 0, 0x0) << 0) | (fill_nibble<false>(false, 0, 0x1) << 4) | (fill_nibble<false>(false, 0, 0x2) << 8) | (fill_nibble<false>(false, 0, 0x3) << 12) |
(fill_nibble<false>(false, 0, 0x4) << 16) | (fill_nibble<false>(false, 0, 0x5) << 20) | (fill_nibble<false>(false, 0, 0x6) << 24) | (fill_nibble<false>(false, 0, 0x7) << 28),
(fill_nibble<false>(false, 0, 0x8) << 0) | (fill_nibble<false>(false, 0, 0x9) << 4) | (fill_nibble<false>(false, 0, 0xa) << 8) | (fill_nibble<false>(false, 0, 0xb) << 12) |
(fill_nibble<false>(false, 0, 0xc) << 16) | (fill_nibble<false>(false, 0, 0xd) << 20) | (fill_nibble<false>(false, 0, 0xe) << 24) | (fill_nibble<false>(false, 0, 0xf) << 28),
(fill_nibble<false>(false, 1, 0x0) << 0) | (fill_nibble<false>(false, 1, 0x1) << 4) | (fill_nibble<false>(false, 1, 0x2) << 8) | (fill_nibble<false>(false, 1, 0x3) << 12) |
(fill_nibble<false>(false, 1, 0x4) << 16) | (fill_nibble<false>(false, 1, 0x5) << 20) | (fill_nibble<false>(false, 1, 0x6) << 24) | (fill_nibble<false>(false, 1, 0x7) << 28),
(fill_nibble<false>(false, 1, 0x8) << 0) | (fill_nibble<false>(false, 1, 0x9) << 4) | (fill_nibble<false>(false, 1, 0xa) << 8) | (fill_nibble<false>(false, 1, 0xb) << 12) |
(fill_nibble<false>(false, 1, 0xc) << 16) | (fill_nibble<false>(false, 1, 0xd) << 20) | (fill_nibble<false>(false, 1, 0xe) << 24) | (fill_nibble<false>(false, 1, 0xf) << 28),
(fill_nibble<false>(true, 0, 0x0) << 0) | (fill_nibble<false>(true, 0, 0x1) << 4) | (fill_nibble<false>(true, 0, 0x2) << 8) | (fill_nibble<false>(true, 0, 0x3) << 12) |
(fill_nibble<false>(true, 0, 0x4) << 16) | (fill_nibble<false>(true, 0, 0x5) << 20) | (fill_nibble<false>(true, 0, 0x6) << 24) | (fill_nibble<false>(true, 0, 0x7) << 28),
(fill_nibble<false>(true, 0, 0x8) << 0) | (fill_nibble<false>(true, 0, 0x9) << 4) | (fill_nibble<false>(true, 0, 0xa) << 8) | (fill_nibble<false>(true, 0, 0xb) << 12) |
(fill_nibble<false>(true, 0, 0xc) << 16) | (fill_nibble<false>(true, 0, 0xd) << 20) | (fill_nibble<false>(true, 0, 0xe) << 24) | (fill_nibble<false>(true, 0, 0xf) << 28),
(fill_nibble<false>(true, 1, 0x0) << 0) | (fill_nibble<false>(true, 1, 0x1) << 4) | (fill_nibble<false>(true, 1, 0x2) << 8) | (fill_nibble<false>(true, 1, 0x3) << 12) |
(fill_nibble<false>(true, 1, 0x4) << 16) | (fill_nibble<false>(true, 1, 0x5) << 20) | (fill_nibble<false>(true, 1, 0x6) << 24) | (fill_nibble<false>(true, 1, 0x7) << 28),
(fill_nibble<false>(true, 1, 0x8) << 0) | (fill_nibble<false>(true, 1, 0x9) << 4) | (fill_nibble<false>(true, 1, 0xa) << 8) | (fill_nibble<false>(true, 1, 0xb) << 12) |
(fill_nibble<false>(true, 1, 0xc) << 16) | (fill_nibble<false>(true, 1, 0xd) << 20) | (fill_nibble<false>(true, 1, 0xe) << 24) | (fill_nibble<false>(true, 1, 0xf) << 28),
};
}
void Blitter::set_control(int index, uint16_t value) {
if(index) {
line_mode_ = (value & 0x0001);
@ -270,36 +362,20 @@ bool Blitter::advance_dma() {
c_data_,
minterms_);
// TODO: don't be so dense as below. This is the initial
// does-it-pass-the-tests? version.
if(exclusive_fill_ || inclusive_fill_) {
// Use the fill tables nibble-by-nibble to figure out the filled word.
uint16_t fill_output = 0;
uint16_t bit = one_dot_ ? 0x0001 : 0x8000;
uint16_t flag = fill_carry ? bit : 0x0000;
while(bit) {
uint16_t pre_toggle = output & bit, post_toggle = pre_toggle;
if(inclusive_fill_) {
pre_toggle &= ~flag; // Accept bits that would transition to set immediately.
post_toggle &= flag; // Accept bits that would transition to clear after the fact.
} else {
post_toggle = 0; // Just do the pre toggle.
}
flag ^= pre_toggle;
fill_output |= flag;
flag ^= post_toggle;
fill_carry = flag;
if(one_dot_) {
bit <<= 1;
flag <<= 1;
} else {
bit >>= 1;
flag >>= 1;
}
int ongoing_carry = fill_carry;
const int type_mask = exclusive_fill_ ? (1 << 5) : 0;
for(int c = 0; c < 16; c += 4) {
const int total_index = (output & 0xf) | (ongoing_carry << 4) | type_mask;
fill_output |= ((fill_values[total_index >> 3] >> ((total_index & 7) * 4)) & 0xf) << c;
ongoing_carry = (fill_carries[total_index >> 5] >> (total_index & 31)) & 1;
output >>= 4;
}
output = fill_output;
fill_carry = ongoing_carry;
}
not_zero_flag_ |= output;