macsehw/hardware/fpga/bbu/bbu.v

1606 lines
59 KiB
Verilog

/* Synthesizable Verilog hardware description for a drop-in
replacement of the Apple Custom Silicon Bob Bailey Unit (BBU), an
address controller for the Macintosh SE and similar computers.
Written in 2020 by Andrew Makousky
Public Domain Dedication:
To the extent possible under law, the author(s) have dedicated all
copyright and related and neighboring rights to this software to
the public domain worldwide. This software is distributed without
any warranty.
You should have received a copy of the CC0 Public Domain Dedication
along with this software. If not, see
<http://creativecommons.org/publicdomain/zero/1.0/>.
*/
`ifndef BBU_V
`define BBU_V
`include "common.vh"
// NOTE: These constants are defined in lower-case because they are
// meant to be treated principally as if they are hard-wired
// registers.
// Table of total supported RAM sizes, used by the RAM row refresh
// circuitry. Note that although hardware only has 23 address
// lines, and only 21 address lines are ever used for RAM, we
// define these registers as 24 address lines solely for Verilog
// source code readability.
`define ramsz_128k 24'h20000
`define ramsz_256k 24'h40000
`define ramsz_512k 24'h80000
`define ramsz_1m 24'h100000
`define ramsz_2m 24'h200000
`define ramsz_2_5m 24'h280000
`define ramsz_4m 24'h400000
// Enumerant values and "shift register indices" for RAM size.
`define RAMSZ_EN_128K 7'b0000001
`define RAMSZ_ENI_128K 0
`define RAMSZ_EN_256K 7'b0000010
`define RAMSZ_ENI_256K 1
`define RAMSZ_EN_512K 7'b0000100
`define RAMSZ_ENI_512K 2
`define RAMSZ_EN_1M 7'b0001000
`define RAMSZ_ENI_1M 3
`define RAMSZ_EN_2M 7'b0010000
`define RAMSZ_ENI_2M 4
`define RAMSZ_EN_2_5M 7'b0100000
`define RAMSZ_ENI_2_5M 5
`define RAMSZ_EN_4M 7'b1000000
`define RAMSZ_ENI_4M 6
// Please note: If we don't list the configuration in one of the
// following the tables, it's not supported by the BBU. The BBU is a
// gate array, not a microcontroller!
// The main and alternate screen buffer memory addresses are
// calculated by subtracting a constant from the installed RAM size.
// Deltas: main -0x5900, alt. -0xd900.
// Computed values for reference:
// 128K: main 0x1a700 alt. 0x12700.
// 256K: main 0x3a700, alt 0x32700.
// 512K: main 0x7a700, alt. 0x72700.
// 1MB: main 0xfa700, alt 0xf2700.
// 2MB: main 0x1fa700, alt 0x1f2700.
// 2.5MB: main 0x27a700, alt 0x272700.
// 4MB: main 0x3fa700, alt 0x3f2700.
// Table of video memory base addresses.
`define vid_main_addr_128k 24'h1a700
`define vid_alt_addr_128k 24'h12700
`define vid_main_addr_256k 24'h3a700
`define vid_alt_addr_256k 24'h32700
`define vid_main_addr_512k 24'h7a700
`define vid_alt_addr_512k 24'h72700
`define vid_main_addr_1m 24'hfa700
`define vid_alt_addr_1m 24'hf2700
`define vid_main_addr_2m 24'h1fa700
`define vid_alt_addr_2m 24'h1f2700
`define vid_main_addr_2_5m 24'h27a700
`define vid_alt_addr_2_5m 24'h272700
`define vid_main_addr_4m 24'h3fa700
`define vid_alt_addr_4m 24'h3f2700
// The main and alternate sound and disk speed buffer addresses are
// calculated by subtracting a constant from the installed RAM size.
// Deltas: main -0x0300, alt. -0x5f00.
// Computed values for reference:
// 128K: main 0x1fd00 alt. 0x1a100.
// 256K: main 0x3fd00, alt. 0x3a100.
// 512K: main 0x7fd00, alt. 0x7a100.
// 1MB: main 0xffd00, alt. 0xfa100.
// 2MB: main 0x1ffd00, alt. 0x1fa100.
// 2.5MB: main 0x27fd00, alt. 0x27a100.
// 4MB: main 0x3ffd00, alt. 0x3fa100.
// Table of sound and disk speed memory base addresses.
`define snddsk_main_addr_128k 24'h1fd00
`define snddsk_alt_addr_128k 24'h1a100
`define snddsk_main_addr_256k 24'h3fd00
`define snddsk_alt_addr_256k 24'h3a100
`define snddsk_main_addr_512k 24'h7fd00
`define snddsk_alt_addr_512k 24'h7a100
`define snddsk_main_addr_1m 24'hffd00
`define snddsk_alt_addr_1m 24'hfa100
`define snddsk_main_addr_2m 24'h1ffd00
`define snddsk_alt_addr_2m 24'h1fa100
`define snddsk_main_addr_2_5m 24'h27fd00
`define snddsk_alt_addr_2_5m 24'h27a100
`define snddsk_main_addr_4m 24'h3ffd00
`define snddsk_alt_addr_4m 24'h3fa100
/* Top-level module for the BBU.
Undocumented signal but assumed to exist:
64KRAM: Here we assign it to pin 21 for our own experimentation, a
pin that is specified as just tied to ground.
*/
module bbu_master_ctrl
// Essential sequential logic RESET and clock signals
(n_res, c16m, c8m, c3_7m, c2m,
// RAM configuration pins
row2, mbram, s64kram,
// MC68000 signals
a9, a17, a19, a20, a21, a22, a23,
r_n_w, n_as, n_uds, n_lds, n_dtack,
n_ipl0, n_ipl1, n_berr,
n_vpa,
// DRAM signals
ra0, ra1, ra2, ra3, ra4, ra5, ra6, ra8, ra7, ra9,
n_cas1l, n_cas0l, ram_r_n_w, n_ras, n_cas1h, n_cas0h,
rdq0, rdq1, rdq2, rdq3, rdq4, rdq5, rdq6, rdq7,
rdq8, rdq9, rdq10, rdq11, rdq12, rdq13, rdq14, rdq15,
n_en245, n_pmcyc,
// ROM and memory overlay signals
n_romen,
// VIA signals
via_cs1,
n_viairq,
// Video signals
vidpg2, vidout, n_hsync, n_vsync,
// Sound and disk speed signals
sndres, snd, pwm,
// IWM signals
n_iwm,
// SCC signals
n_sccen, n_sccrd, n_iow,
// SCSI signals
n_scsi, scsidrq, n_dack,
// PDS signals
n_extdtk, n_earen,
);
// The Macintosh SE deprecated SNDPG2. Nevertheless... for the
// sake of possibly making quasi-hardware replicas of earlier
// Macintosh computers easier, we will preserve an implementation
// here anyways.
// Essential sequential logic RESET and clock signals
input wire n_res; // *RESET signal
input wire c16m; // 15.667200 MHz master clock input
output wire c8m; // 7.8336 MHz clock output
output wire c3_7m; // 3.672 MHz clock output
output wire c2m; // 1.9584 MHz clock output
// RAM configuration pins
input wire row2; // 1/2 rows of RAM SIMMs jumper
input wire mbram; // 256K/1MB RAM SIMMs jumper
input wire s64kram; // DOUBLY UNDOCUMENTED 64K RAM SIMMs jumper
// MC68000 signals
input wire a9, a17, a19, a20, a21, a22, a23;
input wire r_n_w, n_as, n_uds, n_lds;
`output_wz wire n_dtack;
output wire n_ipl0;
input wire n_ipl1;
output wire n_berr;
input wire n_vpa;
// DRAM signals
inout wire ra0, ra1, ra2, ra3, ra4, ra5, ra6, ra8;
output wire ra7, ra9;
output wire n_cas1l, n_cas0l, ram_r_n_w, n_ras, n_cas1h, n_cas0h;
inout wire rdq0, rdq1, rdq2, rdq3, rdq4, rdq5, rdq6, rdq7,
rdq8, rdq9, rdq10, rdq11, rdq12, rdq13, rdq14, rdq15;
output wire n_en245, n_pmcyc;
// ROM and memory overlay signals
output wire n_romen;
// VIA signals
output wire via_cs1;
input wire n_viairq;
// Video signals
input wire vidpg2; // VIDPG2 signal
output wire vidout; // VIDOUT signal
output wire n_hsync; // *HSYNC signal
output wire n_vsync; // *VSYNC signal
// Sound and disk speed signals
input wire sndres;
output wire snd, pwm;
// IWM signals
output wire n_iwm;
// SCC signals
output wire n_sccen, n_sccrd, n_iow;
// SCSI signals
output wire n_scsi;
input wire scsidrq;
output reg n_dack;
// PDS signals
input wire n_extdtk;
output reg n_earen; // ??? Purpose unknown.
// Note tristate inout ... 'bz for high impedance. 8'bz for wide.
// Full DRAM address bus snooping? I almost thought this was
// required to implement some functions, but it turns out it isn't,
// partial address bus snooping is good enough. Nevertheless, I'll
// preserve the implementation as it could be useful for BBU mods.
reg [7:0] row_snoop; reg [7:0] col_snoop;
// Installed RAM size.
wire [23:0] ramsz;
// SCSI support: Handle chip select, and handle DMA. Important!
// Never have *DACK and *SCSI active simultaneously. Okay, so
// let's get this straight. When we would normally assert *DTACK,
// we release *SCSI and wait for SCSI.DRQ. Then when we receive
// it, we can assert *DTACK and also *DACK, with a timer to
// deassert *DACK. Important! Make sure we do not go faster than
// the minimum read/write pulse width of the SCSI chip.
// Important! How to handle SCSI DMA... according to Guide to the
// Macintosh family hardware, page 126, this is "pseudo-DMA" mode.
// The BBU does not assert `*DTACK` until `SCSIDRQ` is received to
// indicate the DMA transfer is complete. And again, noting from
// page 126, . Likewise, `*DTACK` is asserted for all addresses in
// range, even if nothing is mapped. No stringent bus error
// control here, that's a hobbyist extension. And again, inded
// `*DTACK` is tri-stated according to the manual when `*EXTDTK`
// (`*EXT.DTACK`) is asserted.
// TODO: How do we know if DMA mode is true? Do we have to snoop
// the address but to get this information.
// Now, here's the golden rule: "If any access has not terminated
// within 265 ms, the BBU asserts the bus error signal /BERR."
// There you go, that's how it is driven, though the condition only
// happens for PDS and SCSI accesses. So, another thing,
// yes... there must be at least a nominal delay to assert `*DTACK`
// to allow PDS cards to intervene by asserting `*EXTDTK` first.
// TODO MOVE DOCUMENTATION: PLEASE NOTE, PDS cards can also access
// DRAM, not just the CPU. This is mainly a matter of bus
// arbitration, then as far s the BBU is concerned, PDS access to
// DRAM should appear identical to CPU access to DRAM. Guide to
// the Macintosh Family hardware, page 84.
//////////////////////////////////////////////////
// Pure combinatorial logic is defined first.
// Assert `*IPL0` if we receive an interrupt signal from the VIA or
// SCSI. However, do not assert `*IPL0` if the SCC asserts
// `*IPL1`. Guide to the Macintosh family hardware, page 113.
// SCSI interrupts are signaled only on IRQ from the SCSI
// controller. DRQ is not attached to MC68000 interrupt lines
// whatsoever, it must be polled by software and is only used by
// the BBU as specified in the other section. TODO CONFIRM: The
// SCSI IRQ line attaches directly to `*IPL0`?
assign n_ipl0 = ~n_ipl1 | n_viairq;
//////////////////////////////////////////////////
// Sub-modules are instantiated here.
// The remainder of definitions are for sequential logic.
always @(negedge n_res) begin
// Initialize all output registers on RESET.
n_dack <= 1;
n_earen <= 1;
end
always @(posedge c16m) begin
if (n_res) begin
// All high speed sequential logic goes here.
end
end
always @(posedge c8m) begin
if (n_res) begin
// All CPU speed sequential logic goes here.
end
end
always @(posedge c3_7m) begin
if (n_res) begin
// All peripheral speed sequential logic goes here.
end
end
always @(posedge c2m) begin
if (n_res) begin
// Only DRAM operations go here.
end
end
always @(negedge c2m) begin
if (n_res) begin
// Only DRAM operations go here.
end
end
endmodule
/*
DRAM refresh? See if we can do this during horizontal trace I
guess.
Important notes, DRAM initialization, you must do at least 8 cycles of
RAS refresh or CAS before RAS refresh before the DRAM is ready to use.
Macintosh Plus and newer DRAM speed is rated at a maximum access time
of 150 ns. So, the 2 MHz clock is quite appropriate for row and
column access strobes. Finally, a note on timing. The important
thing is to make sure there is sufficient delay after asserting RAS
and CAS. You can just have the delay times uniform and the DRAM
readout is immediately accessible after. Oh, and write-enable?
Typically that is asserted before asserting CAS, but after asserting
RAS. However, as I see it, asserting earlier does no harm.
PLEASE NOTE: One memory access can occur on one cycle of the 2 MHz
clock, we use the falling and rising edges to time the emission of row
access strobe and column access strobe respectively. Since we fetch
16 bits at a time, this allows for fetching two bits per 16 MHz cycle.
Since we only need one screen bit per 16 MHz cycle, this means we only
consume 50% of the memory bus cycles during horizontal scan, the other
50% of cycles are free for CPU memory accesses. The important thing,
access cycles are assigned constantly, and the CPU is forced to wait
until its turn. Video access only happens on a constant index, there
is no dynamic schedule requesting.
But actually, PLEASE NOTE. Despite the labeling of the circuits, the
Macintosh SE actually uses a 75%/25% between the CPU and the vdieo
memory access due to its performance edge. Macintosh Plus and earlier
models used 50/50. Well, vague hint... that doesn't really make sense
to me, though, I'll just fo with 50/50 and leave that note in place.
Plase note that audio buffers are fetched at the end of horizontal
lines.
Write down all my questions thus far about the BBU:
* How does the BBU refresh the DRAM? Is this once at the end of
drawing a video frame? Unlike the Apple II, video frame
drawing doesn't automatically refresh the DRAM because it
doesn't access all rows of DRAM.
Is column access strobe required for DRAM refresh, or does using
only row access strobe work just fine? Another way of asking, do we
use CAS before RAS refresh? I'd say we don't make the assumption
for greater flexibility on installed memory options.
* What are the timing requirements for DRAM access? When does
write-enable need to be set to function properly?
* What is the default configuration of the data bus when the CPU
is not requesting access? I'm assuming it is switched to
high-impedance, i.e. all of ROM, RAM, and peripherals are
disabled and not accessing the bus.
* Is the BBU designed to use CBR refresh on the DRAM, or does it use
RAS refresh?
*/
// Clock divider module. Generate the frequency-divided clock
// signals.
module clock_div (n_res, c16m, c8m, c3_7m, c2m_e, n_pmcyc, pmcyc_pt);
input wire n_res;
input wire c16m;
output reg c8m;
output reg c3_7m;
// c2m is now controlled by the DRAM controller state machine.
// This is just an I/O argument placeholder. We still generate the
// signal internally, though.
input wire c2m_e;
output wire n_pmcyc;
// *PMCYC "pre-trigger": will the *PMCYC state be negated on the
// next cycle?
output wire pmcyc_pt;
// TODO FIXME: `*PMCYC` should not be a strict 1MHz clock, because
// during vertical blanking, all cycles (except for horizontal
// blanking sound cycles) are fair game for CPU use. PLEASE NOTE:
// According to Guide to the Macintosh family hardware, page 194,
// the process of scanning the screen buffer also refreshes the
// DRAM. But I don't quite understand how this works, wouldn't you
// need to access more addresses to refresh all the DRAM? But,
// PLEASE NOTE. Macintosh SE/30 takes one access cycle every
// 15.6us for DRAM refresh.
// So, what's the secret sauce of the Macintosh SE being more
// performant in memory access? Guide to the Macintosh family
// hardware, page 401. During the BBU memory access cycle time,
// unlike earlier models that would only read one word, the BBU
// reads two 16-bit words. Yes, so it does do buffering! This
// allows the CPU to have free access to the next two cycles. So,
// the word is hard and strong now, `*PMCYC` is not a simple 1MHz
// clock, but has a much more complex timing circuit. That equates
// to a 200% memory access speedup during screen scanning in the
// Macintosh SE compared to the Macintosh Plus.
/* Inside Macintosh claims that the serial clock is 3.672 MHz.
Clock multiplication (via PLL) and division can be used to
generate this from the 15.6672 MHz clock as follows:
15.6672 / 3.6720 = 9792/2295 = (51*2^6*3)/(51*3^2*5)
= (2^6)/(3*5) = 64/15
This would entail a PLL clock running at 235.008 MHz inside the
BBU, which was impractical for the technology available during
the 1980s. But if that were configured, a simple divide-by-64
frequency counter would yield a perfect clock signal.
As it turns out, the actual Macintosh did not use a true,
constant-period 15.6672 MHz clock, but rather a 3.686 MHz clock
with a phase/period error of up to 1 clock cycle of the 15.6672
MHz clock. Sequential logic is used to effect a principal
divide-by-four clock cycle format, and at every fourth 3.686 MHz
clock cycle, one extra 15.6672 MHz clock cycle is slipped in on
the last low-edge half-period of the 3.686 MHz clock. Over a
long period of time, this effects an average frequency division
factor of 4.25.
And yes, even with that introduced phase/period error, the
downstream hardware apparently still works just fine, thanks to
the divide-by-16 in front of the SCC's internal baud rate
generator. This gives you a max baud of 230.4 kbits/sec, with a
phase/period error of 1/(16*16) = 0.39%. This is the same baud
as AppleTalk. */
// TODO EVALUATE: Optimize this to minimize the number of register
// bits required, while still preserving ideal frequency division
// and synchronization behavior. Maybe not... less registers
// entails more combinatorial logic delay.
// We use shift registers or 1-bit inverters for high performance,
// minimal cycle overhead.
reg c16m_div4_cntr; // C16M / 4 counter
// Complex C16M -> C3_7M divider counter, principal divide-by-4
reg c16m_div4_0_cntr;
// Complex C16M -> C3_7M divider counter, counter for slipping in
// extra cycle
reg [16:0] c16m_div4_25_cntr;
reg [3:0] c16m_div8_cntr; // C16M / 8 counter
reg [7:0] c16m_div16_cntr; // C16M / 16 counter
reg c4m;
reg c2m;
reg c1m;
assign pmcyc_pt = c16m_div16_cntr[7];
// assign c2m_e = c2m;
assign n_pmcyc = c1m;
always @(negedge n_res) begin
// Initialize all output registers on RESET.
c8m <= 0;
c4m <= 0;
c3_7m <= 0;
c2m <= 0;
c1m <= 0;
// Initialize all internal registers on RESET.
c16m_div4_cntr <= 0;
c16m_div4_0_cntr <= 0;
c16m_div4_25_cntr <= 1;
c16m_div8_cntr <= 1;
c16m_div16_cntr <= 1;
end
always @(posedge c16m) begin
if (n_res) begin
c8m <= ~c8m;
if (c16m_div4_cntr) c4m <= ~c4m;
c16m_div4_cntr <= ~c16m_div4_cntr;
if (~c16m_div4_25_cntr[16]) begin
if (c16m_div4_0_cntr) c3_7m <= ~c3_7m;
c16m_div4_0_cntr <= ~c16m_div4_0_cntr;
end
// else Slip in the extra cycle by not incrementing the
// principal divide-by-4 counter.
c16m_div4_25_cntr <= { c16m_div4_25_cntr[15:0],
c16m_div4_25_cntr[16] };
if (c16m_div8_cntr[3]) c2m <= ~c2m;
c16m_div8_cntr <= { c16m_div8_cntr[2:0], c16m_div8_cntr[3] };
if (c16m_div16_cntr[7]) c1m <= ~c1m;
c16m_div16_cntr <= { c16m_div16_cntr[6:0], c16m_div16_cntr[7] };
end
end
endmodule
// RAM configuration options module. Process the RAM configuration
// jumpers and generate the corresponding internal RAM configuration
// and address map signals.
module ram_config (row2, mbram, s64kram,
ramsz, ramsz_en, vid_main_addr, vid_alt_addr,
snddsk_main_addr, snddsk_alt_addr);
// RAM configuration pins
input wire row2; // 1/2 rows of RAM SIMMs jumper
input wire mbram; // 256K/1MB RAM SIMMs jumper
input wire s64kram; // DOUBLY UNDOCUMENTED 64K RAM SIMMs jumper
// Installed RAM size.
output wire [23:0] ramsz;
// Symbolic enumerant for installed RAM size. We use this in
// "shift register" fashion to keep downstream logic gates simple.
output wire [6:0] ramsz_en;
// Address of main video buffer
output wire [23:0] vid_main_addr;
// Address of alternate video buffer
output wire [23:0] vid_alt_addr;
// Address of main sound/disk buffer
output wire [23:0] snddsk_main_addr;
// Address of alternate sound/disk buffer
output wire [23:0] snddsk_alt_addr;
// TODO FIXME: We need a way to detect the 2.5MB RAM configuration
// and set the memory addresses accordingly. The BBU could do its
// own memory-test in this configuration to set a bit indicating
// that there is 2.5MB of RAM installed rather than 4MB.
assign ramsz
= (s64kram) ? // 64K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`ramsz_128k
: // 2 rows of RAM SIMMs
`ramsz_256k
: (~mbram) ? // 256K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`ramsz_512k
: // 2 rows of RAM SIMMs
`ramsz_1m
: // 1MB RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`ramsz_2m
: // 2 rows of RAM SIMMs
`ramsz_4m
;
assign ramsz_en
= (s64kram) ? // 64K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`RAMSZ_EN_128K
: // 2 rows of RAM SIMMs
`RAMSZ_EN_256K
: (~mbram) ? // 256K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`RAMSZ_EN_512K
: // 2 rows of RAM SIMMs
`RAMSZ_EN_1M
: // 1MB RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`RAMSZ_EN_2M
: // 2 rows of RAM SIMMs
`RAMSZ_EN_4M
;
assign vid_main_addr
= (s64kram) ? // 64K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`vid_main_addr_128k
: // 2 rows of RAM SIMMs
`vid_main_addr_256k
: (~mbram) ? // 256K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`vid_main_addr_512k
: // 2 rows of RAM SIMMs
`vid_main_addr_1m
: // 1MB RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`vid_main_addr_2m
: // 2 rows of RAM SIMMs
`vid_main_addr_4m
;
assign vid_alt_addr
= (s64kram) ? // 64K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`vid_alt_addr_128k
: // 2 rows of RAM SIMMs
`vid_alt_addr_256k
: (~mbram) ? // 256K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`vid_alt_addr_512k
: // 2 rows of RAM SIMMs
`vid_alt_addr_1m
: // 1MB RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`vid_alt_addr_2m
: // 2 rows of RAM SIMMs
`vid_alt_addr_4m
;
assign snddsk_main_addr
= (s64kram) ? // 64K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`snddsk_main_addr_128k
: // 2 rows of RAM SIMMs
`snddsk_main_addr_256k
: (~mbram) ? // 256K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`snddsk_main_addr_512k
: // 2 rows of RAM SIMMs
`snddsk_main_addr_1m
: // 1MB RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`snddsk_main_addr_2m
: // 2 rows of RAM SIMMs
`snddsk_main_addr_4m
;
assign snddsk_alt_addr
= (s64kram) ? // 64K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`snddsk_alt_addr_128k
: // 2 rows of RAM SIMMs
`snddsk_alt_addr_256k
: (~mbram) ? // 256K RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`snddsk_alt_addr_512k
: // 2 rows of RAM SIMMs
`snddsk_alt_addr_1m
: // 1MB RAM SIMMs
(~row2) ? // 1 row of RAM SIMMs
`snddsk_alt_addr_2m
: // 2 rows of RAM SIMMs
`snddsk_alt_addr_4m
;
endmodule
/* Check the high address bits and boot-time memory overlay to
determine which zone an address access is within and set the RAM,
ROM, or device enable signals accordingly. If an address is in an
invalid range, a bus error can optionally be signaled. N.B.: Note
that the *RAMEN signal is only used internally for the sake of more
modular implementation.
These are the particular address zones for Macintosh SE, according
to MESS/MAME source code. In particular, SCC and IWM are
surrounded with invalid address guard zones:
* 0x000000 - 0x3fffff: RAM/ROM (switches based on overlay)
* 0x400000 - 0x4fffff: ROM
* 0x580000 - 0x5fffff: 5380 NCR/Symbios SCSI peripherals chip
* 0x600000 - 0x7fffff: RAM, boot-time overlay only
* 0x900000 - 0x9fffff: Zilog 8530 SCC (Serial Control Chip) Read
* 0xb00000 - 0xbfffff: Zilog 8530 SCC (Serial Control Chip) Write
* 0xd00000 - 0xdfffff: IWM (Integrated Woz Machine; floppy)
* 0xe80000 - 0xefffff: Rockwell 6522 VIA
* 0xf00000 - 0xffffef: ??? (the ROM appears to be accessing here)
* 0xfffff0 - 0xffffff: Auto Vector
TODO FIXME: Note that SCSI chip enable is NOT asserted when A9 is
one, and Macintosh Plus asserts DACK when A9 is one, but not when
it is zero. Okay, so I think I have that figured out. Add 512 for
DMA mode access logic, otherwise we do not implement DMA access
mode at all.
This address map has also been confirmed with Guide to the
Macintosh family hardware, page 127. PLEASE NOTE: In SCC Read
zone, if A0 == 1, then that is an SCC RESET. IWM must be A0 == 1,
VIA must be A0 == 0, SCC write must be A0 == 1. SCSI read A0 == 0,
SCSI write A0 == 1.
SCC access notes: In the Macintosh Plus and older, even byte
accesses are a read, odd byte accesses are a write. Namely: `*LDS`
== 0 == write, `*UDS` == 0 == read. Remember, it's big endian.
What about the separate address regions? Well, I say just ignore
those, it's there for a convenient convention, but it's not the
officially documented hardware protocol.
In the Macintosh SE, this behavior is somewhat changed. Now `*IOW`
controls both `*SCSI.IOW` and `*SCC.WR`, and `*SCCRD` is wired
directly from the BBU to `*SCC.RD`. `*UDS` is used to trigger
`*SCSI.IOR`.
PLEASE NOTE: Guide to the Macintosh family hardware, page 121.
Rather than signaling bus errors for out of range RAM addresses,
overflow accesses should just wrap around and repeat access to the
same RAM.
PLEASE NOTE: Guide to the Macintosh family hardware, page 122. "A
word-wide access to any SCC address causes a phase shift in the
processor clock, and is used by the operating system to correct the
phase when necessary."
"At system startup, the operating system reads an address in the
range $F0 0000 through $F7 FFFF (labeled _Phase read_ in Gifgures
3-1 and 3-2) to determine whether the computer's high-frequency
timing signals are correctly in phase. When the timing signals are
not in phase, RAM accesses are not timed correctly, causing an
unstable video display, RAM errors, and VIA errors." Well, I can
see how that would be happening with just a bunch of PALs, but I
don't think it still needs to be that way when you have the BBU in
charge, you can do better! And indeed, that note only appears to
apply to the Macintosh Plus, not the Macintosh SE, as it is listed
in that section.
But, for the sake of Macintosh Plus recreation, please note. The
TSG PAL places one of the high-frequency phase indicator signals on
D0 of the address bus, I assume it is the 1 MHz *PMCYC signal. A
multiple address read instruction is used to read three consecutive
data values from the address bus in synchronous I/O mode (due to
using address 0xf00000), so each address read is either 10 or 20
CPU cycles long. This will sample the phase at a few different
points. The phase readings are added together, if they are zero or
one, then we are "in-phase." Otherwise, phase readings 2 and 3 are
considered "out-of-phase" so we access a word-width address in the
SCC range to shift the high frequency timing by 128 ns (one CPU
clock cycle at 8 MHz).
The important thing to remember is that every MC68000 instruction
executes for an even number of clock cycles (divisible by 2), and
there is no pipelining in these early CPUs.
TODO FIXME: Guide to the Macintosh family hardware, page 127.
Okay, so this is how to interpret the information about the
boot-time overlay for the alternate RAM location. Only a 2MB zone
is exposed, even though you may have up to 4MB of RAM. So, 2.5MB
and 4MB RAM configurations need to be treated specially. In
particular, only the "upper row" of RAM is accessible greater than
or equal to the address 0x680000. Below that address, you get
access to the first 512K of RAM. Macintosh Plus actually uses the
same overlay map too. That's a defect in MESS/MAME source code but
apparently it's not important, interestingly enough. Okay, I guess
I don't really quite understand, though, sorry. Okay, this means,
the first row, right? "If 2.5 or 4 MB only upper row is
accessible" page 127.
The signal *VPA is asserted in address range 0xe00000 - 0xffffff,
optionally excluding invalid addresses when a bus error signal is
generated. This is for synchronous I/O devices accessed in the old
6800 fashion.
*/
module decode_devaddr (n_res, clk, n_ramen, n_romen, n_scsi,
n_sccen, n_sccrd, n_iow, n_iwm, via_cs1, n_vpa,
n_berr, n_as, a23_19, berr_ram, n_extdtk,
boot_overlay, r_n_w, reg_romen, reg_ram_w,
n_dtack_peri);
input wire n_res;
input wire clk;
output wire n_ramen;
output wire n_romen;
output wire n_scsi;
output wire n_sccen;
output wire n_sccrd;
output wire n_iow;
output wire n_iwm;
output wire via_cs1;
output wire n_vpa;
output wire n_berr;
input wire n_as;
input wire [4:0] a23_19;
input wire berr_ram; // Would this RAM address be a bus error?
input wire n_extdtk;
input wire boot_overlay;
input wire r_n_w;
// Have we attempted to write to the regular RAM address zone?
output wire reg_ram_w;
// Has an address access to the regular *ROMEN zone occurred? This
// signal is used to disable the boot-time memory overlay.
output wire reg_romen;
output wire n_dtack_peri; // *DTACK for peripherals
wire reg_ram, reg_ram_r;
wire scsi, sccrd, sccwr;
wire berr_scc;
wire n_dtack_peri_pt; // *DTACK peripherals "pre-trigger"
reg n_dtack_peri_bf; // *DTACK for peripherals buffer
// If the boot-time overlay is enabled but we attempt to write to
// the regular RAM region, then this is a *RAMEN trigger. The
// overlay control logic will zero the switch on the next cycle,
// but we use combinatorial logic here to act immediately.
assign reg_ram = ~n_as & (a23_19[4:3] == 2'b00);
assign reg_ram_r = (r_n_w & reg_ram);
assign reg_ram_w = (~r_n_w & reg_ram);
assign n_ramen = ~((boot_overlay) ?
((~n_as & ((a23_19[4:1] == 4'h6) |
(a23_19[4:1] == 4'h7))) | reg_ram_w) :
reg_ram);
assign reg_romen = ~n_as & (a23_19[4:1] == 4'h4);
// Only trigger *ROMEN for reads, not writes, in overlay zone.
assign n_romen = ~(reg_romen | (boot_overlay & reg_ram_r));
assign scsi = ~n_as & (a23_19[4:0] == 5'b01011);
assign n_scsi = ~scsi;
assign sccrd = ~n_as & (a23_19[4:1] == 4'h9);
assign sccwr = ~n_as & (a23_19[4:1] == 4'hb);
assign n_sccen = ~(sccrd | sccwr);
assign n_sccrd = ~sccrd;
assign n_iow = ~((scsi & ~r_n_w) | sccwr);
assign n_iwm = ~(~n_as & (a23_19[4:1] == 4'hd));
assign via_cs1 = ~n_as & (a23_19[4:0] == 5'b11101);
assign n_vpa = ~(via_cs1 | (~n_as & (a23_19[4:1] == 4'hf)));
// N.B.: According to Guide to the Macintosh family hardware, page
// 126, the implementation of Auto Vector is easy and
// straightforward for the BBU. Just assert `*VPA` in the address
// range. The CPU sets address lines A3 - A1, and this causes the
// CPU to automatically jump to the memory location containing the
// interrupt handler.
// Optionally trigger bus errors if a read is attempted from the
// write-only SCC address space, and vice versa.
assign berr_scc = (sccrd & ~r_n_w) | (sccwr & r_n_w);
// Note that if the PDS card asserts *EXTDTK, we also must not
// drive *BERR. TODO EVALUATE: Should we wait a cycle before
// asserting *BERR to give the PDS card time to respond first?
assign n_berr = n_as | ~n_extdtk |
~((~n_ramen & berr_ram) |
berr_scc |
(a23_19[4:0] == 5'b01010) |
(a23_19[4:1] == 4'h7) |
(a23_19[4:1] == 4'h8) |
(a23_19[4:1] == 4'ha) |
(a23_19[4:1] == 4'hc) |
(a23_19[4:0] == 5'b11100));
// TODO: Also flag bus errors for the final address zone.
// NOTE: For all peripherals, we must set `*DTACK` from the BBU
// upon successful access condition and time durations because it
// is not set by the device itself. From the time *AS is asserted,
// we simply wait one clock cycle on whatever clock is given to us
// before we trigger *DTACK for the peripheral.
// N.B.: According to Guide to the Macintosh family hardware,
// `*DTACK` is not used to respond to addresses in the range
// 0xe00000 - 0xffffff, only below that. `*VPA` alone is used to
// respond to these addresses. So therefore, we exclude `VIA.CS1`.
assign n_dtack_peri_pt = n_scsi & n_sccen & n_iwm;
// N.B. We use combinatorial logic here to deassert *DTACK for
// peripherals as soon as *AS is released.
assign n_dtack_peri = n_as | n_dtack_peri_bf;
always @(negedge n_res) begin
n_dtack_peri_bf <= 1;
end
always @(posedge clk) begin
if (n_res) begin
if (n_as)
n_dtack_peri_bf <= 1;
else begin
if (n_dtack_peri_pt)
n_dtack_peri_bf <= 0;
else
n_dtack_peri_bf <= 1;
end
end
else
; // Nothing to be done during RESET.
end
endmodule
// Determine if a RAM address is out of range and should therefore
// signal a bus error.
module berr_ram_logic (a0_21, ramsz, /*ramsz_en,*/ berr_ram);
input wire [21:0] a0_21;
input wire [23:0] ramsz;
// input wire [6:0] ramsz_en;
output wire berr_ram;
// We could either use arithmetic comparison (easiest to code in
// Verilog), or bit-wise comparisons (possibly more efficient in
// hardware). We may simply consider implementing this logic in a
// separate module.
// 4MB valid: 0x000000 - 0x3fffff
// 4MB invalid: None!
// 2.5MB valid: 0x000000 - 0x27ffff
// 2.5MB invalid: 0x280000 - 0x3fffff
// 2MB valid: 0x000000 - 0x1fffff
// 2MB invalid: 0x200000 - 0x3fffff
// 1MB valid: 0x000000 - 0x0fffff
// 1MB invalid: 0x100000 - 0x3fffff
// 512K valid: 0x000000 - 0x07ffff
// 512K invalid: 0x080000 - 0x3fffff
// 256K valid: 0x000000 - 0x03ffff
// 256K invalid: 0x040000 - 0x3fffff
// 128K valid: 0x000000 - 0x01ffff
// 128K invalid: 0x020000 - 0x3fffff
// N.B. Verilog comparison is unsigned by default.
assign berr_ram = { 1'b0, a0_21[21:17] } < ramsz[22:17];
endmodule
// Boot-time memory overlay register and controlling logic. This is
// fairly straightforward to implement once you see all the other
// logic of the BBU in place.
module overlay_logic (n_res, clk, boot_overlay, reg_romen, reg_ram_w);
input wire n_res;
input wire clk;
// Boot-time memory overlay switch, 1 = enable, 0 = disable.
output reg boot_overlay;
input wire reg_romen;
input wire reg_ram_w;
always @(negedge n_res) begin
// Initialize the overlay switch to ENABLED on RESET.
boot_overlay <= 1;
end
always @(posedge clk) begin
if (n_res) begin
// Disable the overlay on the first access to the regular ROM
// address zone. And, according to MESS/MAME, also disable
// on the first attempt to write the regular RAM zone.
if (reg_romen | reg_ram_w)
boot_overlay <= 0;
else
; // Nothing to be done.
end
end
endmodule
// Column address strobe decode logic. Determine which column access
// strobe line to assert based off of the installed RAM, high-order
// CPU address lines, and *LDS/*UDS signals.
module dramctl_cas (n_cas, n_cas0h, n_cas0l, n_cas1h, n_cas1l,
n_uds, n_lds, row2, mbram, s64kram,
a17, a19, a21);
input wire n_cas;
output wire n_cas0l, n_cas0h, n_cas1l, n_cas1h;
input wire n_uds, n_lds;
input wire row2; // 1/2 rows of RAM SIMMs jumper
input wire mbram; // 256K/1MB RAM SIMMs jumper
input wire s64kram; // DOUBLY UNDOCUMENTED 64K RAM SIMMs jumper
input wire a17, a19, a21;
wire row1en; // Enable row 1? ("Second" row.)
assign row1en = (s64kram) ? a17 : (~mbram) ? a19 : a21;
assign n_cas0h = ~(~n_uds &
((row2) ? ~row1en : 1));
assign n_cas0l = ~(~n_lds &
((row2) ? ~row1en : 1));
assign n_cas1h = ~(~n_uds & row2 & row1en);
assign n_cas1l = ~(~n_lds & row2 & row1en);
endmodule
// RA7/RA9 selector logic. Determine which CPU address pins should be
// routed to these RAM address pins based off of the installed RAM.
module dramctl_ra7_9 (ra7, ra9, cas_n_ras, row2, mbram, s64kram,
a9, a17, a19, a20, a10);
output wire ra7;
output wire ra9;
input wire cas_n_ras; // CAS/*RAS
input wire row2; // 1/2 rows of RAM SIMMs jumper
input wire mbram; // 256K/1MB RAM SIMMs jumper
input wire s64kram; // DOUBLY UNDOCUMENTED 64K RAM SIMMs jumper
input wire a9, a17, a19, a20;
input wire a10; // Snooped from address bus
assign ra7
= (s64kram) ? // 64K RAM SIMMs
(~cas_n_ras) ? a9 : a10
: // 256K RAM SIMMs and 1MB RAM SIMMs
(~cas_n_ras) ? a17 : a9
;
assign ra9
= (mbram) ? // 1MB RAM SIMMs
(~cas_n_ras) ? a20 : a19
: // <1MB RAM SIMMs
0 // RA9 is not used
;
endmodule
// Module to decode a 21-bit address into RAM row and column address
// buffers. Just combinatorial logic, no registers. For ease of
// programming, least significant address bit zero is also included
// even though it is not used.
module decode_drcaddr (a, row_addr, col_addr, s64kram);
input wire [20:0] a;
output wire [9:0] row_addr;
output wire [9:0] col_addr;
input wire s64kram;
wire ra7r, ra7c, ra9r, ra9c;
assign ra7r
= (s64kram) ? // 64K RAM SIMMs
a[9]
: // >=256K RAM SIMMs
a[17]
;
assign ra7c
= (s64kram) ? // 64K RAM SIMMs
a[10]
: // >=256K RAM SIMMs
a[9]
;
assign ra9r = a[20];
assign ra9c = a[19];
assign row_addr = { ra9r, a[18], ra7r, a[16:11], a[1] };
assign col_addr = { ra9c, a[10], ra7c, a[8:2] };
endmodule
// Decode A0 into MC68000 upper and lower data strobes for a single
// 8-bit byte memory access. MC68000 is big endian, so *UDS is byte
// address zero, *LDS is byte address one.
module a0_to_ds (a0, n_uds, n_lds);
input wire a0;
output wire n_uds, n_lds;
assign n_uds = a0;
assign n_lds = ~a0;
endmodule
// TODO: So this is how the fastest CPU access state machine workss.
// *AS is started to be asserted on the rising edge of the CPU clock
// at S2, but it is not guaranteed clearly asserted until the falling
// edge of S2. Between this time, we must assert *DTACK before the
// falling edge of S4. So yes, we effectively have a maximum of two
// 16 MHz cycles to react, but better if we can get it done in 1.5
// cycles at 16 MHz. We must assert *DTACK before 10 ns of the end,
// so better to go 30 ns before the end, i.e. half of a 16 MHz clock
// cycle.
// So, point in hand, we need to design our logic to work very fast
// within these constraints. First, we have a cycle counter runs on
// the _falling edge_ of the clock. We only count one. Then, we have
// a "subcycle counter" which is really just combinatorial logic. We
// must only use combinatorial logic in order to be able to assert
// signals sub-cycle in a way that hopefully still works on an FPGA.
// If this really only works on an ASIC, though, our next best option
// is to use a 32 MHz PLL to satisfy the timing requirements. Also,
// it's important to understand, when using combinatorial logic to
// effectively double the clock frequency, understanding signal
// propagation delay is critical.
// Here, we assume the rising edge of C16M is synced to the rising
// edge of C8M, otherwise this won't work!
// There's good reason to be concerned about glitching when not using
// register-buffered outputs on a sequential clock. But, yeah,
// hardware-wise, this is the simplest way to do clock frequency
// doubling, and if it works like the 3.686 MHz clock with
// phase/period error, history be told. F.Y.I. Anecdotal evidence
// hints that the Macintosh Plus may have used this combinatorial
// logic hackery on clock signals in its PALs. But it was still
// driven by clock C16M and was a registered PAL? Maybe I better just
// look down its datasheet real good, check that it uses typical
// latching. PAL16R4.
// Okay, here's the deal. We can get this to work reasonably without
// glitching through the means of programmable slew rate limiting
// filter circuits built into the FPGA at the pin terminals. But that
// is the necessity if we go that path.
module dram_fast_test (c16m, c8m, n_res, n_as);
input wire c16m;
input wire c8m;
input wire n_res;
input wire n_as;
reg state_buf;
wire state0, state1, state2, state3;
// wire state4;
assign state0 = n_as;
assign state1 = ~n_as & ~state_buf & c16m & ~c8m;
assign state2 = ~n_as & ~state_buf & ~c16m & ~c8m;
assign state3 = ~n_as & state_buf & c16m & c8m;
// assign state4 = ~n_as & state_buf & ~c16m & c8m;
always @(negedge n_res) begin
state_buf <= 0;
end
always @(posedge c16m) begin
if (n_res) begin
if (~n_as)
state_buf <= 1;
else
state_buf <= 0;
end
end
endmodule
// Stateful advancement DRAM controller logic for CPU memory accesses.
module dramctl_cpu (n_res, clk, r_n_w, c2m,
ram_r_n_w, n_as, n_ras, n_cas,
n_en245, n_pmcyc, n_dtack,
ra, row_snoop, col_snoop, snoop_valid);
input wire n_res, clk, r_n_w;
output reg c2m;
input wire n_as;
output wire ram_r_n_w;
// Row Address Strobe (*RAS), Column Address Strobe (*CAS)
output reg n_ras, n_cas;
// *EN245 controls the bus switches to remove CPU access to RAM
// data output/input (RDQ) by placing those lanes in a
// high-impedance state.
output reg n_en245;
// *PMCYC principally enables the row/column address multiplexers.
// At a higher level, it is used to determine whether it is the
// CPU's turn to access RAM or the BBU's turn to access RAM. The
// CPU always takes a multiple of 4 clock cycles running at 8 MHz
// to access RAM. This signal could possibly be just wired up to a
// 1 MHz clock.
input wire n_pmcyc;
// output reg n_pmcyc;
output reg n_dtack;
input wire [9:0] ra; // RAM Address (RA)
// In order to implement the memory overlay switch, we must snoop
// the address bus. These are the registers we use to store the
// address multiplexer outputs.
// N.B. RA7 and RA9 are set by us, but for simplicity of downstream
// code, we capture them into the address snooping registers
// regardless.
output reg [9:0] row_snoop;
output reg [9:0] col_snoop;
output reg snoop_valid;
wire n_as_full;
// N.B. We use a shift-register style state buffer for speed and
// simplicity.
reg [5:0] drc_state_buf;
wire [5:0] drc_state;
// Internal "full" address strobe signal: only assert when both *AS
// and *PMCYC are asserted. Bit-wise OR by De Morgan's Theorem.
assign n_as_full = n_as | n_pmcyc;
// Use combinatorial logic to advance into state 4 under as quickly
// as possible when the respective conditions are met. We skip
// state 2 because it is redundant by virtue of the BBU
// implementation.
assign drc_state = drc_state_buf ^
(~n_pmcyc & drc_state_buf[0] & ~n_as_full) ? 5 : 0;
// Set RAM R/*W based off of the CPU output and simply checking
// that we are not in state 1.
assign ram_r_n_w = (drc_state[0]) ? 1 : r_n_w;
always @(negedge n_res) begin
// Initialize all output registers on RESET.
c2m <= 0;
n_ras <= 1;
n_cas <= 1;
n_en245 <= 1;
// n_pmcyc <= 1;
n_dtack <= 1;
row_snoop <= 0;
col_snoop <= 0;
snoop_valid <= 0;
// Initialize all internal registers on RESET.
drc_state_buf <= 1;
end
always @(posedge clk) begin
if (n_res) begin
// Check *AS before checking the case statements for a faster
// response to when *AS is released.
if (n_as_full) begin
if (~drc_state[0]) begin // State != 1
// Abort or finish the DRAM access when we get the
// release signal.
c2m <= 0;
n_ras <= 1;
n_cas <= 1;
// n_pmcyc <= 1;
n_en245 <= 1;
n_dtack <= 1;
drc_state_buf <= 1; // Finished.
end
else /* if (drc_state[0]) */
; // Nothing to be done.
end
else /* if (~n_as_full) */ begin
// N.B.: Using case statements might not generate the most
// efficient hardware because the generated hardware might
// be checking to ensure all the other bits are zero,
// which is not needed here. Hence a bunch of if
// statements.
// if (drc_state[0]) begin // State 1
// Initiate the DRAM access process in a serial
// mannner. This alternative approach to
// combinatorial logic up-front can be faster for
// very high-speed code, but it is not desired for
// Macintosh BBU cycle timing.
// drc_state_buf <= 1;
// end
// N.B.: State 2 is skipped because our master immediately
// triggers *PMCYC at the beginning of the CPU's turn to
// access memory.
// if (drc_state[1]) begin // State 2
// // Enable the row address multiplexers.
// n_pmcyc <= 0;
// // Trigger *EN245 as early as possible.
// n_en245 <= 0;
// // Invalidate the snoop status.
// snoop_valid <= 0;
// drc_state_buf <= drc_state << 1;
// end
if (drc_state[2]) begin // State 4
// Trigger *RAS.
n_ras <= 0;
// Trigger *EN245 as early as possible.
n_en245 <= 0;
// Snoop the row address.
row_snoop <= ra;
// Invalidate the snoop status.
snoop_valid <= 0;
drc_state_buf <= drc_state << 1;
end
if (drc_state[3]) begin // State 8
// Enable the column address multiplexers.
c2m <= 1;
drc_state_buf <= drc_state << 1;
end
// N.B.: If we wanted even faster DRAM controller response
// to CPU requests, then we could trigger *CAS and *DTACK
// in state 4 and use use combinatorial logic gate delays
// to ensure that the column access strobe does not reach
// the DRAM before the column address has stabilized. The
// CPU only checks *DTACK on select clock cycles, which is
// why it is okay for us to set it a little bit too early.
// However, please note tha column address snooping must
// still happen in state 16.
if (drc_state[4]) begin // State 16
// Trigger *CAS and signal the DRAM data is ready.
n_cas <= 0;
n_dtack <= 0;
// Snoop the column address.
col_snoop <= ra;
// Signal that we successfully snooped a full address
// from the bus.
snoop_valid <= 1;
drc_state_buf <= drc_state << 1;
end
// State 32: No state advancement until *AS is no longer
// triggered, then we execute the finish sequence.
// default: ; // Other states should never happen.
end
end
else
; // Nothing to be done during RESET.
end
endmodule
// Stateful advancement DRAM controller logic for BBU memory accesses.
// TODO: Try to see a single internal state register can be shared
// across these two stateful DRAM controller modules, because only one
// will ever be used at a time and run to completion.
module dramctl_bbu (n_res, clk, r_n_w,
ram_r_n_w, n_as, n_ras, n_cas,
n_pmcyc, n_dtack,
ra, row_addr, col_addr, bbu_dtack);
input wire n_res, clk, r_n_w;
input wire n_as;
output wire ram_r_n_w;
// Row Address Strobe (*RAS), Column Address Strobe (*CAS)
output reg n_ras, n_cas;
// *PMCYC principally enables the row/column address multiplexers.
// At a higher level, it is used to determine whether it is the
// CPU's turn to access RAM or the BBU's turn to access RAM. The
// CPU always takes a multiple of 4 clock cycles running at 8 MHz
// to access RAM. This signal could possibly be just wired up to a
// 1 MHz clock.
input wire n_pmcyc;
output wire n_dtack;
output reg [9:0] ra; // RAM Address (RA)
input wire [9:0] row_addr;
input wire [9:0] col_addr;
output wire bbu_dtack;
wire n_as_full;
// N.B. We use a shift-register style state buffer for speed and
// simplicity.
reg [6:0] drc_state_buf;
wire [6:0] drc_state;
// Internal "full" address strobe signal: only assert when *AS is
// asserted and *PMCYC is deasserted. Bit-wise OR by De Morgan's
// Theorem.
assign n_as_full = n_as | ~n_pmcyc;
// Use combinatorial logic to advance into state 2 under as quickly
// as possible when the respective conditions are met.
assign drc_state = drc_state_buf ^
(n_pmcyc & drc_state_buf[0] & ~n_as_full) ? 3 : 0;
// Set RAM R/*W based off of the BBU command and simply checking
// that we are not in state 1.
assign ram_r_n_w = (drc_state[0]) ? 1 : r_n_w;
// BBU internal DRAM accesses always hold the CPU *DTACK line high.
assign n_dtack = 1;
// The BBU internal DTACK is simply implemented by checking that we
// are in state 64.
assign bbu_dtack = drc_state[6];
always @(negedge n_res) begin
// Initialize all output registers on RESET.
ra <= 10'bz; // Set to high-impedance to disable output.
n_ras <= 1;
n_cas <= 1;
// Initialize all internal registers on RESET.
drc_state_buf <= 1;
end
always @(posedge clk) begin
if (n_res) begin
// Check *AS before checking the case statements for a faster
// response to when *AS is released.
if (n_as_full) begin
if (~drc_state[0]) begin // State != 1
// Abort or finish the DRAM access when we get the
// release signal.
ra <= 10'bz; // Set to high-impedance to disable output.
n_ras <= 1;
n_cas <= 1;
drc_state_buf <= 1; // Finished.
end
else /* if (drc_state[0]) */
; // Nothing to be done.
end
else /* if (~n_as_full) */ begin
// N.B.: Using case statements might not generate the most
// efficient hardware because the generated hardware might
// be checking to ensure all the other bits are zero,
// which is not needed here. Hence a bunch of if
// statements.
// if (drc_state[0]) begin // State 1
// Initiate the DRAM access process in a serial
// mannner. This alternative approach to
// combinatorial logic up-front can be faster for
// very high-speed code, but it is not desired for
// Macintosh BBU cycle timing.
// drc_state_buf <= 1;
// end
if (drc_state[1]) begin // State 2
// Enable the row address multiplexers.
ra <= row_addr;
drc_state_buf <= drc_state << 1;
end
if (drc_state[2]) begin // State 4
// Trigger *RAS.
n_ras <= 0;
drc_state_buf <= drc_state << 1;
end
if (drc_state[3]) begin // State 8
// Enable the column address multiplexers.
ra <= col_addr;
drc_state_buf <= drc_state << 1;
end
if (drc_state[4]) begin // State 16
// Trigger *CAS.
n_cas <= 0;
drc_state_buf <= drc_state << 1;
end
if (drc_state[5]) begin // State 32
// Signal the DRAM data is ready (via combinatorial
// logic). We need to make sure we wait the nominal
// number of cycles for the DRAM to be ready, unlike
// the case for CPU accesses where it will wait on its
// own due to the nature of its clock cycle alignment.
drc_state_buf <= drc_state << 1;
end
// State 64: No state advancement until *AS is no longer
// triggered, then we execute the finish sequence.
// default: ; // Other states should never happen.
end
end
else
; // Nothing to be done during RESET.
end
endmodule
// TODO: Module to fetch an address from DRAM and store it in the
// destination BBU internal register. This will be just a coding
// exercise because of Verilog silliness. Actually, might as well
// make two modules since that is all that is needed to start: one for
// video, one for DRAM.
module fetch_vid_addr (n_res, clk, n_as, a, vidreg, s64kram);
input wire n_res;
input wire clk;
input wire n_as;
input wire [20:0] a;
output reg [15:0] vidreg;
input wire s64kram;
wire [9:0] row_addr;
wire [9:0] col_addr;
decode_drcaddr u0 (a, row_addr, col_addr, s64kram);
// TODO: Schedule the memory access request and now wait until we
// are signaled ready. How? Well, I recommend using... a
// dedicated module for the scheduling logic. Well, first of all,
// we're running timers and clocks for all matters of RAM accesses.
// We know those are guaranteed not to conflict since everything is
// carefully required to take turns on known intervals. So,
// remember that, no queuing logic needed. The DRAM access request
// will be able to be fulfilled immediately. We just need to check
// the counters to know which register to send the result to.
// Okay, so point in hand is now obvious. We need to actually
// implement the video and audio/disk counters first before we can
// implement the DRAM access requests.
always @(negedge n_res) begin
vidreg <= 0;
end
always @(posedge clk) begin
if (n_res) begin
end
else
; // Nothing to be done during RESET.
end
endmodule
// TODO: Video and audio/disk timers. This is the core logic where we
// determine where we are on the screen, which buffer address to fetch
// next, and so on.
module avtimers ();
input wire n_res;
input wire c16m;
// Video signals
input wire vidpg2; // VIDPG2 signal
output reg vidout; // VIDOUT signal
output reg n_hsync; // *HSYNC signal
output reg n_vsync; // *VSYNC signal
// Sound and disk speed signals
input wire sndres;
output reg snd, pwm;
// C16M pixel clock (0.064 us per pixel).
// 512 horizontal draw pixels, 192 horizontal blanking pixels.
// 342 scan lines, 28 scan lines vertical blanking.
// 60.15 Hz vertical scan rate.
// (512 + 192) * (342 + 28) = 260480 pixel clock ticks per frame.
// Total screen buffer size = 10944 words. High-order bit of each
// 16-bit word is the leftmost pixel, low-order bit is the
// rightmost pixel. Words in ascending order move from left to
// right in the scan line, first scan line is topmost and then
// moves downward.
// *HSYNC and *VSYNC counters are negative during blanking.
reg [15:0] vidout_sreg; // VIDOUT shift register
reg [4:0] vidout_cntr; // VIDOUT remaining counter
reg [9:0] vid_hsync_cntr; // *HSYNC counter
reg [8:0] vid_vsync_cntr; // *VSYNC counter
wire [23:0] vid_main_addr; // Address of main video buffer
wire [23:0] vid_alt_addr; // Address of alternate video buffer
// Sound and disk speed buffers are scanned 370 words per video
// frame, and the size of both buffers together is 370 words. Or,
// 260480 pixel clock ticks / 370 = 704 pixel clock ticks per word.
// In a single scan line, (512 + 192) / 704 = 704 / 704 = exactly 1
// word is read. The sound byte is the most significant byte, the
// disk speed byte is the least significant byte. Both the sound
// sample and disk speed represent a PCM amplitude value, this is
// used to generate a PDM waveform that can be processed by a
// low-pass filter to generate the analog signal.
// Well, at least in concept... Inside Macintosh claims that only a
// single pulse is generated, so this is not quite your typical PDM
// audio circuit. Nevertheless, the sample rate is 22.2555 kHz, so
// it's not too bad overall for generating lo-fi audio. But, good
// point to ponder, this is an area of improvement where a
// different algorithm can generate better audio quality.
reg [15:0] snddsk_reg; // PCM sound sample and disk speed register
wire [23:0] snddsk_main_addr; // Address of main sound/disk buffer
wire [23:0] snddsk_alt_addr; // Address of alternate sound/disk buffer
// We must be careful that the sound circuitry does not attempt to
// access RAM at the same time as the video circuitry. Because the
// phases are coherent, we can simply align the sound and disk
// speed RAM fetch to be at a constant offset relative to the video
// RAM fetch.
// PLEASE NOTE: We must carefully time our RAM accesses since they
// have delays and we don't want the screen bits shift register
// buffer to run empty before we have the next word available from
// RAM. Our ideal is that the next word is available from RAM just
// as we are shifting out the last pixel, so that we can use a
// non-blocking assign and the new first pixel will be available
// right at the start of the next pixel clock cycle. Otherwise,
// less ideal but easier to program would be to use two 16-bit
// buffers as a FIFO.
// N.B. Sound generation. Since the original Macintosh used only
// simple counters and the registered ASG PAL for PWM generation,
// there is no way the more sophisticated PWM techniques could have
// been used. This is going to be a one-shot countdown timer for
// generating a single pulse per byte.
always @(negedge n_res) begin
// Initialize all output registers on RESET.
vidout <= 0; n_hsync <= 1; n_vsync <= 1;
snd <= 0; pwm <= 0;
// Initialize all internal registers on RESET.
vidout_sreg <= 0;
vidout_cntr <= 0;
vid_hsync_cntr <= 0;
vid_vsync_cntr <= 0;
snddsk_reg <= 0;
end
endmodule
/* TODO: Summary of what is missing and left to implement: DRAM
initialization pulses, DRAM refresh, detect 2.5MB of RAM and
configure address buffers accordingly, video, disk, and audio
scanout, SCSI DMA, EXTDTK yielding.
Okay, so the VERDICT on DRAM initialization pulses. We don't
actually use these as we should, strictly speaking, but why does it
still work? On power-on RESET, the first few CPU memory accesses
are all in ROM. Yet the BBU is still scanning the DRAM and
fetching words from it. These first few words will be garbage, but
it's okay because we're read-only. By the time the CPU makes its
first write to DRAM, all is well because it received a sufficient
number of *RAS initialization pulses.
So really, the only mysteries left now is 2.5MB RAM detection and
4MB RAM DRAM refresh. Then we need to do the busywork to implement
the PWM and video scanout modules and we're done! */
/*
Now I think I see why there is the funny thing going on with the
address multiplexers for RAS/CAS. It is a required modification to
use DRAM fast-page mode since RAS and CAS are still logically
"swapped" compared to a contiguous memory layout. This swapping of
RAS and CAS is used to get DRAM refresh for free when scanning the
video framebuffer.
Okay, so let's review in more detail.
Address multiplexer row address outputs:
A2, A3, A4, A5, A6, A7, A8, A10
A9 inputs directly to BBU, controls RA7.
This is a straight match-up to DRAM row address lines.
RA0 A2
RA1 A3
RA2 A4
RA3 A5
RA4 A6
RA5 A7
RA6 A8
RA7 A9
RA8 A10
RA9 A19 (optional) (!)
So, how many longwords for the video framebuffer?
512 x 342 / 32 = 5472 longwords
In hex: 0x1560
Number of address bits fully covered by a full scan: 12
Okay, so the question, does it work for DRAM refresh? Indeed it
does! Well, at least for <=1MB of RAM.
RA9 looks to be trouble. But, the Unitron reverse engineering docs
almost have a solution. Set this to A17 (?) and it should "just work"
I guess. But why?
*/
`endif // NOT BBU_V