mirror of
https://github.com/TomHarte/CLK.git
synced 2024-07-09 06:29:33 +00:00
Merge pull request #641 from TomHarte/DIVSTiming
Substantially improves DIVS timing.
This commit is contained in:
commit
c6cf0e914b
@ -154,25 +154,15 @@ template <Analyser::Static::Macintosh::Target::Model model> class ConcreteMachin
|
|||||||
|
|
||||||
using Microcycle = CPU::MC68000::Microcycle;
|
using Microcycle = CPU::MC68000::Microcycle;
|
||||||
|
|
||||||
HalfCycles perform_bus_operation(const Microcycle &cycle, int is_supervisor) {
|
forceinline HalfCycles perform_bus_operation(const Microcycle &cycle, int is_supervisor) {
|
||||||
HalfCycles delay(0);
|
|
||||||
|
|
||||||
// Grab the word-precision address being accessed.
|
|
||||||
uint32_t word_address = 0;
|
|
||||||
|
|
||||||
// Take a sneak peak and add a delay if this is a RAM access that would overlap with video.
|
|
||||||
if(cycle.data_select_active()) {
|
|
||||||
word_address = cycle.active_operation_word_address();
|
|
||||||
if(memory_map_[word_address >> 18] == BusDevice::RAM && ram_subcycle_ < 4) {
|
|
||||||
delay = HalfCycles(4 - ram_subcycle_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Advance time.
|
// Advance time.
|
||||||
advance_time(cycle.length + delay);
|
advance_time(cycle.length);
|
||||||
|
|
||||||
// A null cycle leaves nothing else to do.
|
// A null cycle leaves nothing else to do.
|
||||||
if(!(cycle.operation & (Microcycle::NewAddress | Microcycle::SameAddress))) return delay;
|
if(!(cycle.operation & (Microcycle::NewAddress | Microcycle::SameAddress))) return HalfCycles(0);
|
||||||
|
|
||||||
|
// Grab the value on the address bus, at word precision.
|
||||||
|
uint32_t word_address = cycle.active_operation_word_address();
|
||||||
|
|
||||||
// Everything above E0 0000 is signalled as being on the peripheral bus.
|
// Everything above E0 0000 is signalled as being on the peripheral bus.
|
||||||
mc68000_.set_is_peripheral_address(word_address >= 0x700000);
|
mc68000_.set_is_peripheral_address(word_address >= 0x700000);
|
||||||
@ -185,9 +175,11 @@ template <Analyser::Static::Macintosh::Target::Model model> class ConcreteMachin
|
|||||||
// having set VPA above deals with those given that the generated address
|
// having set VPA above deals with those given that the generated address
|
||||||
// for interrupt acknowledge cycles always has all bits set except the
|
// for interrupt acknowledge cycles always has all bits set except the
|
||||||
// lowest explicit address lines.
|
// lowest explicit address lines.
|
||||||
if(!cycle.data_select_active() || (cycle.operation & Microcycle::InterruptAcknowledge)) return delay;
|
if(!cycle.data_select_active() || (cycle.operation & Microcycle::InterruptAcknowledge)) return HalfCycles(0);
|
||||||
|
|
||||||
|
// Grab the word-precision address being accessed.
|
||||||
uint16_t *memory_base = nullptr;
|
uint16_t *memory_base = nullptr;
|
||||||
|
HalfCycles delay;
|
||||||
switch(memory_map_[word_address >> 18]) {
|
switch(memory_map_[word_address >> 18]) {
|
||||||
default: assert(false);
|
default: assert(false);
|
||||||
|
|
||||||
@ -289,6 +281,15 @@ template <Analyser::Static::Macintosh::Target::Model model> class ConcreteMachin
|
|||||||
|
|
||||||
memory_base = ram_;
|
memory_base = ram_;
|
||||||
word_address &= ram_mask_;
|
word_address &= ram_mask_;
|
||||||
|
|
||||||
|
// Apply a delay due to video contention if applicable; technically this is
|
||||||
|
// incorrectly placed — strictly speaking here I'm extending the part of the
|
||||||
|
// bus cycle after DTACK rather than delaying DTACK. But it adds up to the
|
||||||
|
// same thing.
|
||||||
|
if(ram_subcycle_ < 4) {
|
||||||
|
delay = HalfCycles(4 - ram_subcycle_);
|
||||||
|
advance_time(delay);
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case BusDevice::ROM: {
|
case BusDevice::ROM: {
|
||||||
|
@ -1,9 +1,6 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<Workspace
|
<Workspace
|
||||||
version = "1.0">
|
version = "1.0">
|
||||||
<FileRef
|
|
||||||
location = "group:/Users/thomasharte/Projects/CLK/OSBindings/Mac/Clock SignalTests/68000ArithmeticTests.mm">
|
|
||||||
</FileRef>
|
|
||||||
<FileRef
|
<FileRef
|
||||||
location = "self:Clock Signal.xcodeproj">
|
location = "self:Clock Signal.xcodeproj">
|
||||||
</FileRef>
|
</FileRef>
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1049,51 +1049,56 @@ template <class T, bool dtack_is_implicit, bool signal_will_perform> void Proces
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t dividend = int32_t(destination()->full);
|
const int32_t signed_dividend = int32_t(destination()->full);
|
||||||
int32_t divisor = s_extend16(source()->halves.low.full);
|
const int32_t signed_divisor = s_extend16(source()->halves.low.full);
|
||||||
const int64_t quotient = int64_t(dividend) / int64_t(divisor);
|
const auto result_sign =
|
||||||
|
( (0 <= signed_dividend) - (signed_dividend < 0) ) *
|
||||||
|
( (0 <= signed_divisor) - (signed_divisor < 0) );
|
||||||
|
|
||||||
|
const uint32_t dividend = uint32_t(abs(signed_dividend));
|
||||||
|
const uint32_t divisor = uint32_t(abs(signed_divisor));
|
||||||
|
|
||||||
int cycles_expended = 12; // Covers the nn nnn n to get beyond the sign test.
|
int cycles_expended = 12; // Covers the nn nnn n to get beyond the sign test.
|
||||||
if(dividend < 0) {
|
if(signed_dividend < 0) {
|
||||||
cycles_expended += 2; // An additional microycle applies if the dividend is negative.
|
cycles_expended += 2; // An additional microycle applies if the dividend is negative.
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for overflow. If it exists, work here is already done.
|
// Check for overflow. If it exists, work here is already done.
|
||||||
if(quotient > 32767 || quotient < -32768) {
|
const auto quotient = dividend / divisor;
|
||||||
|
if(quotient > 32767) {
|
||||||
overflow_flag_ = 1;
|
overflow_flag_ = 1;
|
||||||
set_next_microcycle_length(HalfCycles(3*2*2));
|
set_next_microcycle_length(HalfCycles(6*2*2));
|
||||||
|
|
||||||
// These are officially undefined for results that overflow, so the below is a guess.
|
// These are officially undefined for results that overflow, so the below is a guess.
|
||||||
zero_result_ = decltype(zero_result_)(divisor & 0xffff);
|
zero_result_ = decltype(zero_result_)(dividend);
|
||||||
negative_flag_ = zero_result_ & 0x8000;
|
negative_flag_ = zero_result_ & 0x8000;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
zero_result_ = decltype(zero_result_)(quotient);
|
const uint16_t remainder = uint16_t(signed_dividend % signed_divisor);
|
||||||
|
const int signed_quotient = result_sign*int(quotient);
|
||||||
|
destination()->halves.high.full = remainder;
|
||||||
|
destination()->halves.low.full = uint16_t(signed_quotient);
|
||||||
|
|
||||||
|
zero_result_ = decltype(zero_result_)(signed_quotient);
|
||||||
negative_flag_ = zero_result_ & 0x8000;
|
negative_flag_ = zero_result_ & 0x8000;
|
||||||
overflow_flag_ = 0;
|
overflow_flag_ = 0;
|
||||||
|
|
||||||
// TODO: check sign rules here; am I necessarily giving the remainder the correct sign?
|
// Algorithm here: there is a fixed cost per unset bit
|
||||||
// (and, if not, am I counting it in the correct direction?)
|
// in the first 15 bits of the unsigned quotient.
|
||||||
const uint16_t remainder = uint16_t(dividend % divisor);
|
auto positive_quotient_bits = ~quotient & 0xfffe;
|
||||||
destination()->halves.high.full = remainder;
|
|
||||||
destination()->halves.low.full = uint16_t(quotient);
|
|
||||||
|
|
||||||
// Algorithm here: there is a fixed three-microcycle cost per bit set
|
|
||||||
// in the unsigned quotient; there is an additional microcycle for
|
|
||||||
// every bit that is set. Also, since the possibility of overflow
|
|
||||||
// was already dealt with, it's now a smaller number.
|
|
||||||
int positive_quotient_bits = int(abs(quotient)) & 0xfffe;
|
|
||||||
convert_to_bit_count_16(positive_quotient_bits);
|
convert_to_bit_count_16(positive_quotient_bits);
|
||||||
cycles_expended += 2 * positive_quotient_bits;
|
cycles_expended += 2 * positive_quotient_bits;
|
||||||
|
|
||||||
// There's then no way to terminate the loop that isn't at least six cycles long.
|
// There's then no way to terminate the loop that isn't at least ten cycles long;
|
||||||
cycles_expended += 6;
|
// there's also a fixed overhead per bit. The two together add up to the 104 below.
|
||||||
|
cycles_expended += 104;
|
||||||
|
|
||||||
if(divisor < 0) {
|
// This picks up at 'No more bits' in yacht.txt's diagram.
|
||||||
|
if(signed_divisor < 0) {
|
||||||
cycles_expended += 2;
|
cycles_expended += 2;
|
||||||
} else if(dividend < 0) {
|
} else if(signed_dividend < 0) {
|
||||||
cycles_expended += 4;
|
cycles_expended += 4;
|
||||||
}
|
}
|
||||||
set_next_microcycle_length(HalfCycles(cycles_expended * 2));
|
set_next_microcycle_length(HalfCycles(cycles_expended * 2));
|
||||||
|
Loading…
Reference in New Issue
Block a user