From e746637bee001affafe162b66b373882583e382c Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 27 May 2022 11:12:10 -0400
Subject: [PATCH 1/6] Fill in dynamic cost of shifts.

---
 Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
index 921b182b9..4a6265414 100644
--- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
+++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
@@ -2539,8 +2539,8 @@ template <typename IntT> void ProcessorBase::did_muls(IntT) {
 	// TODO: calculate cost.
 }
 
-void ProcessorBase::did_shift(int) {
-	// TODO: calculate cost.
+void ProcessorBase::did_shift(int bits_shifted) {
+	dynamic_instruction_length_ = bits_shifted;
 }
 
 template <bool use_current_instruction_pc> void ProcessorBase::raise_exception(int vector) {

From 165ebe8ae31a186042c4e43c72010bb22d1cbd27 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 27 May 2022 14:41:42 -0400
Subject: [PATCH 2/6] Add time calculation for MULU and MULS.

---
 .../Implementation/68000Mk2Implementation.hpp | 23 +++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
index 4a6265414..2cd80f9e5 100644
--- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
+++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
@@ -2531,14 +2531,29 @@ template <bool did_overflow> void ProcessorBase::did_divs(int32_t, int32_t) {
 	// TODO: calculate cost.
 }
 
-template <typename IntT> void ProcessorBase::did_mulu(IntT) {
-	// TODO: calculate cost.
+#define convert_to_bit_count_16(x)			\
+	x = ((x & 0xaaaa) >> 1) + (x & 0x5555);	\
+	x = ((x & 0xcccc) >> 2) + (x & 0x3333);	\
+	x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f);	\
+	x = ((x & 0xff00) >> 8) + (x & 0x00ff);
+
+template <typename IntT> void ProcessorBase::did_mulu(IntT multiplier) {
+	// Count number of bits set.
+	convert_to_bit_count_16(multiplier);
+	dynamic_instruction_length_ = multiplier;
 }
 
-template <typename IntT> void ProcessorBase::did_muls(IntT) {
-	// TODO: calculate cost.
+template <typename IntT> void ProcessorBase::did_muls(IntT multiplier) {
+	// Count number of transitions from 0 to 1 or from 1 to 0 — i.e. the
+	// number of times that a bit is not equal to the one to its right.
+	// Treat the bit to the right of b0 as 0.
+	int number_of_pairs = (multiplier ^ (multiplier << 1)) & 0xffff;
+	convert_to_bit_count_16(number_of_pairs);
+	dynamic_instruction_length_ = number_of_pairs;
 }
 
+#undef convert_to_bit_count_16
+
 void ProcessorBase::did_shift(int bits_shifted) {
 	dynamic_instruction_length_ = bits_shifted;
 }

From e11990e4533ba46443cd980c7d49bf5404335d98 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 27 May 2022 14:56:04 -0400
Subject: [PATCH 3/6] Make an attempt at DIVS timing.

---
 .../Implementation/68000Mk2Implementation.hpp | 85 +++++++++++++++++--
 1 file changed, 80 insertions(+), 5 deletions(-)

diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
index 2cd80f9e5..a9ddfb43e 100644
--- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
+++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
@@ -2523,12 +2523,44 @@ void ProcessorBase::did_bit_op(int bit_position) {
 	dynamic_instruction_length_ = int(bit_position > 15);
 }
 
-template <bool did_overflow> void ProcessorBase::did_divu(uint32_t, uint32_t) {
-	// TODO: calculate cost.
-}
+template <bool did_overflow> void ProcessorBase::did_divu(uint32_t dividend, uint32_t divisor) {
+	if(!divisor) {
+		dynamic_instruction_length_ = 4;	// nn nn precedes the usual exception activity.
+		return;
+	}
 
-template <bool did_overflow> void ProcessorBase::did_divs(int32_t, int32_t) {
-	// TODO: calculate cost.
+	if(did_overflow) {
+		dynamic_instruction_length_ = 3;	// Just a quick nn n, and then on to prefetch.
+		return;
+	}
+
+	// Calculate cost; this is based on the flowchart in yacht.txt.
+	// I could actually calculate the division result using this code,
+	// since this is a classic divide algorithm, but would rather that
+	// errors produce incorrect timing only, not incorrect timing plus
+	// incorrect results.
+	dynamic_instruction_length_ = 3;	// Covers the nn n to get into the loop.
+
+	divisor <<= 16;
+	for(int c = 0; c < 15; ++c) {
+		if(dividend & 0x80000000) {
+			dividend = (dividend << 1) - divisor;
+			dynamic_instruction_length_ += 2;	// The fixed nn iteration cost.
+		} else {
+			dividend <<= 1;
+
+			// Yacht.txt, and indeed a real microprogram, would just subtract here
+			// and test the sign of the result, but this is easier to follow:
+			if (dividend >= divisor) {
+				dividend -= divisor;
+				dynamic_instruction_length_ += 3;	// i.e. the original nn plus one further n before going down the MSB=0 route.
+			} else {
+				dynamic_instruction_length_ += 4;	// The costliest path (since in real life it's a subtraction and then a step
+				// back from there) — all costs accrue. So the fixed nn loop plus another n,
+				// plus another one.
+			}
+		}
+	}
 }
 
 #define convert_to_bit_count_16(x)			\
@@ -2537,6 +2569,49 @@ template <bool did_overflow> void ProcessorBase::did_divs(int32_t, int32_t) {
 	x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f);	\
 	x = ((x & 0xff00) >> 8) + (x & 0x00ff);
 
+template <bool did_overflow> void ProcessorBase::did_divs(int32_t dividend, int32_t divisor) {
+	// The route to spotting divide by 0 is just nn nn.
+	if(!divisor) {
+		dynamic_instruction_length_ = 4;	// nn nn precedes the usual exception activity.
+		return;
+	}
+
+	// It's either five or six microcycles to get into the main loop, depending
+	// on dividend sign.
+	dynamic_instruction_length_ = 5 + (dividend < 0);
+
+	if(did_overflow) {
+		return;
+	}
+
+	// There's always a cost of four microcycles per bit, plus an additional
+	// one for each that is non-zero.
+	//
+	// The sign bit does not count here; it's the low fifteen bits that matter
+	// only, in the unsigned version of the result.
+	dynamic_instruction_length_ += 60;
+
+	int result_bits = abs(dividend / divisor) & 0x7fff;
+	convert_to_bit_count_16(result_bits);
+	dynamic_instruction_length_ += result_bits;
+
+	// Determine the tail cost; a divisor of less than 0 leads to one exit,
+	// a divisor of greater than zero makes the result a function of the
+	// sign of the dividend.
+	//
+	// In all cases, this is counting from 'No more bits' in the Yacht diagram.
+	if(divisor < 0) {
+		dynamic_instruction_length_ += 4;
+		return;
+	}
+
+	if(dividend < 0) {
+		dynamic_instruction_length_ += 5;
+	} else {
+		dynamic_instruction_length_ += 3;
+	}
+}
+
 template <typename IntT> void ProcessorBase::did_mulu(IntT multiplier) {
 	// Count number of bits set.
 	convert_to_bit_count_16(multiplier);

From e8dd8215bab21a2057ee63fb57dc1541304c0cca Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 27 May 2022 15:37:40 -0400
Subject: [PATCH 4/6] Tweak per empirical results.

---
 .../Implementation/68000Mk2Implementation.hpp  | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
index a9ddfb43e..c37276954 100644
--- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
+++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
@@ -2576,22 +2576,26 @@ template <bool did_overflow> void ProcessorBase::did_divs(int32_t dividend, int3
 		return;
 	}
 
-	// It's either five or six microcycles to get into the main loop, depending
+	// It's either six or seven microcycles to get into the main loop, depending
 	// on dividend sign.
-	dynamic_instruction_length_ = 5 + (dividend < 0);
+	dynamic_instruction_length_ = 6 + (dividend < 0);
 
 	if(did_overflow) {
 		return;
 	}
 
-	// There's always a cost of four microcycles per bit, plus an additional
-	// one for each that is non-zero.
+	// There's a fixed cost per bit, plus an additional one for each that is zero.
 	//
-	// The sign bit does not count here; it's the low fifteen bits that matter
+	// The sign bit does not count here; it's the high fifteen bits that matter
 	// only, in the unsigned version of the result.
-	dynamic_instruction_length_ += 60;
+	//
+	// Disclaimer: per the flowchart it looks to me like this constant should be 60
+	// rather than 49 — four microcycles per bit. But the number 49 makes this
+	// algorithm exactly fit the stated minimum and maximum costs. Possibly the
+	// undefined difference between a nop cycle an an idle wait is relevant here?
+	dynamic_instruction_length_ += 49;
 
-	int result_bits = abs(dividend / divisor) & 0x7fff;
+	int result_bits = ~abs(dividend / divisor) & 0xfffe;
 	convert_to_bit_count_16(result_bits);
 	dynamic_instruction_length_ += result_bits;
 

From d17d77714fdec27e35ca6e43fe961dec1602fedb Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 27 May 2022 15:40:06 -0400
Subject: [PATCH 5/6] Remove outdated TODO.

---
 Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
index c37276954..d2b9024ec 100644
--- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
+++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
@@ -2443,8 +2443,6 @@ void Processor<BusHandler, dtack_is_implicit, permit_overrun, signal_will_perfor
 			instruction_address_.l += 2;	// Push the address of the instruction after the trap.
 		RaiseException(InstructionSet::M68k::Exception::TRAPV);
 
-#undef TODOState
-
 		default:
 			printf("Unhandled state: %d; opcode is %04x\n", state_, opcode_);
 			assert(false);

From 35e73b77f410223acbf499c392e367d81d51a76e Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 27 May 2022 21:54:23 -0400
Subject: [PATCH 6/6] Fix interrupt stack frame.

---
 .../Implementation/68000Mk2Implementation.hpp         | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
index d2b9024ec..e75ad731e 100644
--- a/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
+++ b/Processors/68000Mk2/Implementation/68000Mk2Implementation.hpp
@@ -526,9 +526,9 @@ void Processor<BusHandler, dtack_is_implicit, permit_overrun, signal_will_perfor
 			SetupDataAccess(0, Microcycle::SelectWord);
 			SetDataAddress(registers_[15].l);
 
-			// Push status.
+			// Push low part of program counter.
 			registers_[15].l -= 2;
-			Access(captured_status_);			// ns
+			Access(instruction_address_.low);			// ns
 
 			// Do the interrupt cycle, to obtain a vector.
 			temporary_address_.l = 0xffff'fff1 | uint32_t(captured_interrupt_level_ << 1);
@@ -549,11 +549,12 @@ void Processor<BusHandler, dtack_is_implicit, permit_overrun, signal_will_perfor
 			SetupDataAccess(0, Microcycle::SelectWord);
 			SetDataAddress(registers_[15].l);
 
-			registers_[15].l -= 2;
-			Access(instruction_address_.high);	// ns
+			registers_[15].l -= 4;
+			Access(captured_status_);			// ns
 
+			registers_[15].l += 2;
+			Access(instruction_address_.high);	// nS
 			registers_[15].l -= 2;
-			Access(instruction_address_.low);	// nS
 
 			// Grab new program counter.
 			SetupDataAccess(Microcycle::Read, Microcycle::SelectWord);