From e746637bee001affafe162b66b373882583e382c Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Fri, 27 May 2022 11:12:10 -0400 Subject: [PATCH 1/6] Fill in dynamic cost of shifts. --- Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp index 921b182b9..4a6265414 100644 --- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp +++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp @@ -2539,8 +2539,8 @@ template void ProcessorBase::did_muls(IntT) { // TODO: calculate cost. } -void ProcessorBase::did_shift(int) { - // TODO: calculate cost. +void ProcessorBase::did_shift(int bits_shifted) { + dynamic_instruction_length_ = bits_shifted; } template void ProcessorBase::raise_exception(int vector) { From 165ebe8ae31a186042c4e43c72010bb22d1cbd27 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Fri, 27 May 2022 14:41:42 -0400 Subject: [PATCH 2/6] Add time calculation for MULU and MULS. --- .../Implementation/68000Mk2Implementation.hpp | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp index 4a6265414..2cd80f9e5 100644 --- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp +++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp @@ -2531,14 +2531,29 @@ template void ProcessorBase::did_divs(int32_t, int32_t) { // TODO: calculate cost. } -template void ProcessorBase::did_mulu(IntT) { - // TODO: calculate cost. +#define convert_to_bit_count_16(x) \ + x = ((x & 0xaaaa) >> 1) + (x & 0x5555); \ + x = ((x & 0xcccc) >> 2) + (x & 0x3333); \ + x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f); \ + x = ((x & 0xff00) >> 8) + (x & 0x00ff); + +template void ProcessorBase::did_mulu(IntT multiplier) { + // Count number of bits set. + convert_to_bit_count_16(multiplier); + dynamic_instruction_length_ = multiplier; } -template void ProcessorBase::did_muls(IntT) { - // TODO: calculate cost. +template void ProcessorBase::did_muls(IntT multiplier) { + // Count number of transitions from 0 to 1 or from 1 to 0 — i.e. the + // number of times that a bit is not equal to the one to its right. + // Treat the bit to the right of b0 as 0. + int number_of_pairs = (multiplier ^ (multiplier << 1)) & 0xffff; + convert_to_bit_count_16(number_of_pairs); + dynamic_instruction_length_ = number_of_pairs; } +#undef convert_to_bit_count_16 + void ProcessorBase::did_shift(int bits_shifted) { dynamic_instruction_length_ = bits_shifted; } From e11990e4533ba46443cd980c7d49bf5404335d98 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Fri, 27 May 2022 14:56:04 -0400 Subject: [PATCH 3/6] Make an attempt at DIVS timing. --- .../Implementation/68000Mk2Implementation.hpp | 85 +++++++++++++++++-- 1 file changed, 80 insertions(+), 5 deletions(-) diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp index 2cd80f9e5..a9ddfb43e 100644 --- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp +++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp @@ -2523,12 +2523,44 @@ void ProcessorBase::did_bit_op(int bit_position) { dynamic_instruction_length_ = int(bit_position > 15); } -template void ProcessorBase::did_divu(uint32_t, uint32_t) { - // TODO: calculate cost. -} +template void ProcessorBase::did_divu(uint32_t dividend, uint32_t divisor) { + if(!divisor) { + dynamic_instruction_length_ = 4; // nn nn precedes the usual exception activity. + return; + } -template void ProcessorBase::did_divs(int32_t, int32_t) { - // TODO: calculate cost. + if(did_overflow) { + dynamic_instruction_length_ = 3; // Just a quick nn n, and then on to prefetch. + return; + } + + // Calculate cost; this is based on the flowchart in yacht.txt. + // I could actually calculate the division result using this code, + // since this is a classic divide algorithm, but would rather that + // errors produce incorrect timing only, not incorrect timing plus + // incorrect results. + dynamic_instruction_length_ = 3; // Covers the nn n to get into the loop. + + divisor <<= 16; + for(int c = 0; c < 15; ++c) { + if(dividend & 0x80000000) { + dividend = (dividend << 1) - divisor; + dynamic_instruction_length_ += 2; // The fixed nn iteration cost. + } else { + dividend <<= 1; + + // Yacht.txt, and indeed a real microprogram, would just subtract here + // and test the sign of the result, but this is easier to follow: + if (dividend >= divisor) { + dividend -= divisor; + dynamic_instruction_length_ += 3; // i.e. the original nn plus one further n before going down the MSB=0 route. + } else { + dynamic_instruction_length_ += 4; // The costliest path (since in real life it's a subtraction and then a step + // back from there) — all costs accrue. So the fixed nn loop plus another n, + // plus another one. + } + } + } } #define convert_to_bit_count_16(x) \ @@ -2537,6 +2569,49 @@ template void ProcessorBase::did_divs(int32_t, int32_t) { x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f); \ x = ((x & 0xff00) >> 8) + (x & 0x00ff); +template void ProcessorBase::did_divs(int32_t dividend, int32_t divisor) { + // The route to spotting divide by 0 is just nn nn. + if(!divisor) { + dynamic_instruction_length_ = 4; // nn nn precedes the usual exception activity. + return; + } + + // It's either five or six microcycles to get into the main loop, depending + // on dividend sign. + dynamic_instruction_length_ = 5 + (dividend < 0); + + if(did_overflow) { + return; + } + + // There's always a cost of four microcycles per bit, plus an additional + // one for each that is non-zero. + // + // The sign bit does not count here; it's the low fifteen bits that matter + // only, in the unsigned version of the result. + dynamic_instruction_length_ += 60; + + int result_bits = abs(dividend / divisor) & 0x7fff; + convert_to_bit_count_16(result_bits); + dynamic_instruction_length_ += result_bits; + + // Determine the tail cost; a divisor of less than 0 leads to one exit, + // a divisor of greater than zero makes the result a function of the + // sign of the dividend. + // + // In all cases, this is counting from 'No more bits' in the Yacht diagram. + if(divisor < 0) { + dynamic_instruction_length_ += 4; + return; + } + + if(dividend < 0) { + dynamic_instruction_length_ += 5; + } else { + dynamic_instruction_length_ += 3; + } +} + template void ProcessorBase::did_mulu(IntT multiplier) { // Count number of bits set. convert_to_bit_count_16(multiplier); From e8dd8215bab21a2057ee63fb57dc1541304c0cca Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Fri, 27 May 2022 15:37:40 -0400 Subject: [PATCH 4/6] Tweak per empirical results. --- .../Implementation/68000Mk2Implementation.hpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp index a9ddfb43e..c37276954 100644 --- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp +++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp @@ -2576,22 +2576,26 @@ template void ProcessorBase::did_divs(int32_t dividend, int3 return; } - // It's either five or six microcycles to get into the main loop, depending + // It's either six or seven microcycles to get into the main loop, depending // on dividend sign. - dynamic_instruction_length_ = 5 + (dividend < 0); + dynamic_instruction_length_ = 6 + (dividend < 0); if(did_overflow) { return; } - // There's always a cost of four microcycles per bit, plus an additional - // one for each that is non-zero. + // There's a fixed cost per bit, plus an additional one for each that is zero. // - // The sign bit does not count here; it's the low fifteen bits that matter + // The sign bit does not count here; it's the high fifteen bits that matter // only, in the unsigned version of the result. - dynamic_instruction_length_ += 60; + // + // Disclaimer: per the flowchart it looks to me like this constant should be 60 + // rather than 49 — four microcycles per bit. But the number 49 makes this + // algorithm exactly fit the stated minimum and maximum costs. Possibly the + // undefined difference between a nop cycle an an idle wait is relevant here? + dynamic_instruction_length_ += 49; - int result_bits = abs(dividend / divisor) & 0x7fff; + int result_bits = ~abs(dividend / divisor) & 0xfffe; convert_to_bit_count_16(result_bits); dynamic_instruction_length_ += result_bits; From d17d77714fdec27e35ca6e43fe961dec1602fedb Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Fri, 27 May 2022 15:40:06 -0400 Subject: [PATCH 5/6] Remove outdated TODO. --- Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp index c37276954..d2b9024ec 100644 --- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp +++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp @@ -2443,8 +2443,6 @@ void Processor Date: Fri, 27 May 2022 21:54:23 -0400 Subject: [PATCH 6/6] Fix interrupt stack frame. --- .../Implementation/68000Mk2Implementation.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp index d2b9024ec..e75ad731e 100644 --- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp +++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp @@ -526,9 +526,9 @@ void Processor