sync vs async RAM

This commit is contained in:
Steven Hugg 2018-02-27 21:35:42 -06:00
parent b5c74234f3
commit 8f1563f88e
7 changed files with 253 additions and 102 deletions

View File

@ -1,8 +1,59 @@
`ifndef CPU16_H
`define CPU16_H
// include ALU module
`include "cpu8.v"
// ALU operations
`define OP_ZERO 4'h0
`define OP_LOAD_A 4'h1
`define OP_INC 4'h2
`define OP_DEC 4'h3
`define OP_ASL 4'h4
`define OP_LSR 4'h5
`define OP_ROL 4'h6
`define OP_ROR 4'h7
`define OP_OR 4'h8
`define OP_AND 4'h9
`define OP_XOR 4'ha
`define OP_LOAD_B 4'hb
`define OP_ADD 4'hc
`define OP_SUB 4'hd
`define OP_ADC 4'he
`define OP_SBB 4'hf
module ALU(A, B, Y, aluop, carry);
parameter N = 8;
input [N-1:0] A;
input [N-1:0] B;
output [N:0] Y;
input [3:0] aluop;
input carry;
always @(*)
case (aluop)
// unary operations
`OP_ZERO: Y = 0;
`OP_LOAD_A: Y = {1'b0, A};
`OP_INC: Y = A + 1;
`OP_DEC: Y = A - 1;
// unary operations that generate and/or use carry
`OP_ASL: Y = {A, 1'b0};
`OP_LSR: Y = {A[0], 1'b0, A[N-1:1]};
`OP_ROL: Y = {A, carry};
`OP_ROR: Y = {A[0], carry, A[N-1:1]};
// binary operations
`OP_OR: Y = {1'b0, A | B};
`OP_AND: Y = {1'b0, A & B};
`OP_XOR: Y = {1'b0, A ^ B};
`OP_LOAD_B: Y = {1'b0, B};
// binary operations that generate and/or use carry
`OP_ADD: Y = A + B;
`OP_SUB: Y = A - B;
`OP_ADC: Y = A + B + (carry?1:0);
`OP_SBB: Y = A - B - (carry?1:0);
endcase
endmodule
/*
00000aaa 0++++bbb operation A+B->A
@ -30,6 +81,9 @@ module CPU16(clk, reset, hold, busy,
output [15:0] data_out;
output write;
// wait state for RAM?
parameter RAM_WAIT = 1;
reg [15:0] regs[0:7]; // 8 16-bit registers
reg [2:0] state; // CPU state
@ -51,6 +105,8 @@ module CPU16(clk, reset, hold, busy,
localparam S_SELECT = 1;
localparam S_DECODE = 2;
localparam S_COMPUTE = 3;
localparam S_DECODE_WAIT = 4;
localparam S_COMPUTE_WAIT = 5;
localparam SP = 6; // stack ptr = register 6
localparam IP = 7; // IP = register 7
@ -86,22 +142,22 @@ module CPU16(clk, reset, hold, busy,
busy <= 0;
address <= regs[IP];
regs[IP] <= regs[IP] + 1;
state <= S_DECODE;
state <= RAM_WAIT ? S_DECODE_WAIT : S_DECODE;
end
end
// state 2: read/decode opcode
S_DECODE: begin
// default next state
state <= RAM_WAIT && data_in[11] ? S_COMPUTE_WAIT : S_COMPUTE;
casez (data_in)
// 00000aaa0++++bbb operation A+B->A
16'b00000???0???????: begin
aluop <= data_in[6:3];
state <= S_COMPUTE;
end
// 00001aaa01+++bbb operation A+[B]->A
16'b00001???01??????: begin
address <= regs[data_in[2:0]];
aluop <= data_in[6:3];
state <= S_COMPUTE;
if (data_in[2:0] == SP)
regs[SP] <= regs[SP] + 1;
end
@ -110,18 +166,15 @@ module CPU16(clk, reset, hold, busy,
address <= regs[IP];
regs[IP] <= regs[IP] + 1;
aluop <= data_in[6:3];
state <= S_COMPUTE;
end
// 11+++aaa######## immediate binary operation
16'b11??????????????: begin
aluop <= data_in[14:11];
state <= S_COMPUTE;
end
// 00101aaa######## load ZP memory
16'b00101???????????: begin
address <= {8'b0, data_in[7:0]};
aluop <= `OP_LOAD_B;
state <= S_COMPUTE;
end
// 00110aaa######## store ZP memory
16'b00110???????????: begin
@ -134,7 +187,6 @@ module CPU16(clk, reset, hold, busy,
16'b01001???????????: begin
address <= regs[data_in[2:0]] + 16'($signed(data_in[7:3]));
aluop <= `OP_LOAD_B;
state <= S_COMPUTE;
if (data_in[2:0] == SP)
regs[SP] <= regs[SP] + 1;
end
@ -152,11 +204,10 @@ module CPU16(clk, reset, hold, busy,
address <= regs[IP];
regs[IP] <= regs[IP] + 1;
aluop <= data_in[6:3];
state <= S_COMPUTE;
end
// 01110aaa00cccbbb store A -> [B+#], C -> IP
// 01110aaa00cccbbb store A -> [B], C -> IP
16'b01110???00??????: begin
address <= regs[data_in[2:0]] + 16'($signed(data_in[7:3]));
address <= regs[data_in[2:0]];
data_out <= regs[data_in[10:8]];
write <= 1;
state <= S_SELECT;
@ -195,6 +246,13 @@ module CPU16(clk, reset, hold, busy,
// repeat CPU loop
state <= S_SELECT;
end
// wait 1 cycle for RAM read
S_DECODE_WAIT: begin
state <= S_DECODE;
end
S_COMPUTE_WAIT : begin
state <= S_COMPUTE;
end
endcase
end
@ -212,7 +270,8 @@ module test_CPU16_top(
output [15:0] IP,
output zero,
output carry,
output busy
output busy,
output [2:0] state
);
reg [15:0] ram[0:65535];
@ -221,6 +280,7 @@ module test_CPU16_top(
assign IP = cpu.regs[7];
assign zero = cpu.zero;
assign carry = cpu.carry;
assign state = cpu.state;
CPU16 cpu(
.clk(clk),
@ -237,11 +297,11 @@ module test_CPU16_top(
ram[address_bus] <= from_cpu;
end
always @(*)
always @(posedge clk)
if (address_bus[15] == 0)
to_cpu = ram[address_bus];
to_cpu <= ram[address_bus];
else
to_cpu = rom[address_bus[7:0]];
to_cpu <= rom[address_bus[7:0]];
`ifdef EXT_INLINE_ASM
initial begin
@ -250,13 +310,13 @@ module test_CPU16_top(
.arch femto16
.org 0x8000
.len 256
mov sp,@$6fff
mov dx,@Fib
jsr dx
reset
Fib:
mov ax,#1
mov bx,#0
mov sp,@$6fff
Loop:
mov cx,ax
add ax,bx

View File

@ -5,7 +5,6 @@
`include "sprite_scanline_renderer.v"
`include "lfsr.v"
`include "sound_generator.v"
`include "cpu8.v"
`include "cpu16.v"
module maze_game_top(clk, reset, hsync, vsync, rgb);
@ -19,14 +18,26 @@ module maze_game_top(clk, reset, hsync, vsync, rgb);
wire [8:0] vpos;
// video RAM bus
wire [7:0] vram_read;
reg [7:0] vram_write = 0;
reg vram_writeenable = 0;
wire [15:0] ram_read;
reg [15:0] ram_write = 0;
reg ram_writeenable = 0;
// multiplex sprite and tile RAM
wire sprite_ram_select = (vpos == 256);
reg [15:0] tile_ram_addr;
reg [6:0] sprite_ram_addr;
reg [5:0] sprite_ram_addr;
wire tile_reading;
wire sprite_reading;
wire [14:0] mux_ram_addr; // 15-bit RAM access
always @(*)
if (cpu_busy) begin
if (sprite_ram_select)
mux_ram_addr = {9'b1111111, sprite_ram_addr};
else
mux_ram_addr = tile_ram_addr[14:0];
end else
mux_ram_addr = cpu_ram_addr[14:0];
// tile and sprite ROM
wire [10:0] tile_rom_addr;
@ -48,15 +59,13 @@ module maze_game_top(clk, reset, hsync, vsync, rgb);
.vpos(vpos)
);
// video RAM (16k)
RAM #(14,8) vram(
// RAM (32k x 16 bits)
RAM_sync #(15,16) ram(
.clk(clk),
.dout(vram_read),
.din(vram_write),
.addr(sprite_ram_select
? {7'b0111111, sprite_ram_addr}
: tile_ram_addr[13:0]),
.we(vram_writeenable)
.dout(ram_read),
.din(ram_write),
.addr(mux_ram_addr),
.we(ram_writeenable)
);
tile_renderer tile_gen(
@ -66,7 +75,8 @@ module maze_game_top(clk, reset, hsync, vsync, rgb);
.vpos(vpos),
.display_on(display_on),
.ram_addr(tile_ram_addr),
.ram_read(vram_read),
.ram_read(ram_read),
.ram_busy(tile_reading),
.rom_addr(tile_rom_addr),
.rom_data(tile_rom_data),
.rgb(tile_rgb)
@ -78,7 +88,8 @@ module maze_game_top(clk, reset, hsync, vsync, rgb);
.hpos(hpos),
.vpos(vpos),
.ram_addr(sprite_ram_addr),
.ram_data(vram_read),
.ram_data(ram_read),
.ram_busy(sprite_reading),
.rom_addr(sprite_rom_addr),
.rom_data(sprite_rom_data),
.rgb(sprite_rgb)
@ -93,34 +104,74 @@ module maze_game_top(clk, reset, hsync, vsync, rgb);
.addr(sprite_rom_addr),
.data(sprite_rom_data)
);
// sprites overlay tiles
assign rgb = display_on
? (sprite_rgb>0 ? sprite_rgb : tile_rgb)
: 0;
// CPU RAM (32k x 16 bits)
RAM #(15,16) mram(
.clk(clk),
.dout(cpuram_read),
.din(cpuram_write),
.addr(cpuram_addr[14:0]),
.we(cpuram_writeenable)
);
reg [15:0] cpuram_read;
reg [15:0] cpuram_write;
reg [15:0] cpuram_addr;
reg cpuram_writeenable;
// CPU
reg cpu_hold = 0;
wire cpu_busy;
wire [15:0] cpu_ram_addr;
wire busy;
wire [15:0] cpu_bus;
assign cpu_bus = cpu_ram_addr[15]
? rom[cpu_ram_addr[9:0]]
: ram_read;
CPU16 cpu(
.clk(clk),
.reset(reset),
.hold(0),
.busy(busy),
.address(cpuram_addr),
.data_in(cpuram_read),
.data_out(cpuram_write),
.write(cpuram_writeenable));
.hold(tile_reading | sprite_reading),
.busy(cpu_busy),
.address(cpu_ram_addr),
.data_in(cpu_bus),
.data_out(ram_write),
.write(ram_writeenable));
reg [15:0] rom[0:1023];
`ifdef EXT_INLINE_ASM
initial begin
rom = '{
__asm
.arch femto16
.org 0x8000
.len 1024
mov sp,@$6fff
mov dx,@Init
jsr dx
mov ax,#0
mov dx,@Clear
jsr dx
reset
Init:
mov ax,@$6000 ; screen buffer
mov bx,@$7e00 ; page table start
mov cx,#32 ; 32 rows
InitLoop:
mov [bx],ax
mov [ax],ax
add ax,#32
inc bx
dec cx
bnz InitLoop
rts
Clear:
mov bx,@$7e00
mov cx,@1024
ClearLoop:
mov [bx],ax
inc bx
dec cx
bnz ClearLoop
rts
__endasm
};
end
`endif
endmodule

View File

@ -3,7 +3,7 @@
`include "hvsync_generator.v"
module RAM(clk, addr, din, dout, we);
module RAM_sync(clk, addr, din, dout, we);
parameter A = 10; // # of address bits
parameter D = 8; // # of data bits
@ -19,9 +19,31 @@ module RAM(clk, addr, din, dout, we);
always @(posedge clk) begin
if (we) // if write enabled
mem[addr] <= din; // write memory from din
dout <= mem[addr]; // read memory to dout
dout <= mem[addr]; // read memory to dout (sync)
end
endmodule
module RAM_async(clk, addr, din, dout, we);
parameter A = 10; // # of address bits
parameter D = 8; // # of data bits
input clk; // clock
input [A-1:0] addr; // 10-bit address
input [D-1:0] din; // 8-bit data input
output [D-1:0] dout; // 8-bit data output
input we; // write enable
reg [D-1:0] mem [0:(1<<A)-1]; // 1024x8 bit memory
always @(posedge clk) begin
if (we) // if write enabled
mem[addr] <= din; // write memory from din
end
assign dout = mem[addr]; // read memory to dout (async)
endmodule
`endif

View File

@ -32,7 +32,7 @@ module example_bitmap_rom(addr, data);
endmodule
module sprite_scanline_renderer(clk, reset, hpos, vpos, rgb,
ram_addr, ram_data,
ram_addr, ram_data, ram_busy,
rom_addr, rom_data);
parameter NB = 5;
@ -46,8 +46,10 @@ module sprite_scanline_renderer(clk, reset, hpos, vpos, rgb,
input [8:0] vpos;
output [3:0] rgb;
output [NB+1:0] ram_addr;
input [7:0] ram_data;
output [NB:0] ram_addr;
input [15:0] ram_data;
output ram_busy;
output [15:0] rom_addr;
input [15:0] rom_data;
@ -87,25 +89,24 @@ module sprite_scanline_renderer(clk, reset, hpos, vpos, rgb,
always @(posedge clk) begin
ram_busy <= 0;
// reset every frame, don't draw vpos >= 256
if (reset || vpos[8]) begin
// load sprites from RAM on line 260
// 8 cycles per sprite
if (vpos == 260 && hpos < N*8) begin
ram_busy <= 1;
case (hpos[2:0])
0: begin
ram_addr <= {load_index, 2'b00};
3: begin
ram_addr <= {load_index, 1'b0};
end
2: begin
sprite_xpos[load_index] <= ram_data;
ram_addr <= {load_index, 2'b01};
5: begin
sprite_xpos[load_index] <= ram_data[7:0];
sprite_ypos[load_index] <= ram_data[15:8];
ram_addr <= {load_index, 1'b1};
end
4: begin
sprite_ypos[load_index] <= ram_data;
ram_addr <= {load_index, 2'b10};
end
6: begin
sprite_attr[load_index] <= ram_data;
7: begin
sprite_attr[load_index] <= ram_data[7:0];
end
endcase
end
@ -202,13 +203,14 @@ module test_scanline_render_top(clk, reset, hsync, vsync, rgb);
.data(rom_data)
);
wire [6:0] ram_addr;
wire [7:0] ram_read;
reg [7:0] ram_write;
wire [5:0] ram_addr;
wire [15:0] ram_read;
reg [15:0] ram_write;
reg ram_we;
wire ram_busy;
// 128-byte RAM
RAM #(7,8) ram(
// 64-word RAM
RAM_sync #(6,16) ram(
.clk(clk),
.addr(ram_addr),
.dout(ram_read),
@ -224,6 +226,7 @@ module test_scanline_render_top(clk, reset, hsync, vsync, rgb);
.rgb(rgb),
.ram_addr(ram_addr),
.ram_data(ram_read),
.ram_busy(ram_busy),
.rom_addr(rom_addr),
.rom_data(rom_data)
);
@ -231,13 +234,13 @@ module test_scanline_render_top(clk, reset, hsync, vsync, rgb);
always @(posedge clk) begin
// wiggle sprites randomly once per frame
if (vpos == 256) begin
ram_addr <= hpos[8:2];
ram_addr <= hpos[7:2];
// 4 clocks per read/write cycle
if (!hpos[1]) begin
ram_we <= 0;
end else begin
ram_we <= 1;
ram_write <= ram_read + 8'(($random&3)-1);
ram_write <= ram_read + 16'(($random&3)-1);
end
end else
ram_we <= 0;

View File

@ -4,7 +4,7 @@
module tile_renderer(clk, reset, hpos, vpos, display_on,
rgb,
ram_addr, ram_read,
ram_addr, ram_read, ram_busy,
rom_addr, rom_data);
input clk, reset;
@ -14,12 +14,13 @@ module tile_renderer(clk, reset, hpos, vpos, display_on,
output [3:0] rgb;
output reg [15:0] ram_addr;
input [7:0] ram_read;
input [15:0] ram_read;
output reg ram_busy;
output [10:0] rom_addr;
input [7:0] rom_data;
reg [7:0] page_base = 0; // page table base (8 bits)
reg [7:0] page_base = 8'h7e; // page table base (8 bits)
reg [15:0] row_base; // row table base (16 bits)
wire [4:0] row = vpos[7:3]; // 5-bit row, vpos / 8
@ -29,35 +30,39 @@ module tile_renderer(clk, reset, hpos, vpos, display_on,
reg [7:0] char;
reg [7:0] attr;
reg [7:0] next_char;
reg [7:0] next_attr;
// tile ROM address
assign rom_addr = {char, yofs};
reg [15:0] row_buffer[0:31];
// lookup char and attr
always @(posedge clk)
if (hpos[8]) begin
always @(posedge clk) begin
// time to read a row?
if (vpos[2:0] == 7) begin
// read row_base from page table (2 bytes)
case (hpos[7:0])
// read row_base from page table (2 bytes)
// TODO: why 2 cycles?
0: ram_addr <= {page_base, row, 3'b000};
2: row_base[7:0] <= ram_read;
3: ram_addr <= {page_base, row, 3'b001};
5: row_base[15:8] <= ram_read;
186: ram_busy <= 1;
190: ram_addr <= {page_base, 3'b000, row};
192: row_base <= ram_read;
192+32: ram_busy <= 0;
endcase
end else begin
// load row of tile data from RAM
if (hpos >= 192 && hpos < 192+32) begin
ram_addr <= row_base + 16'(hpos[4:0]);
row_buffer[hpos[4:0]-2] <= ram_read;
end
end
// latch character data
if (hpos < 256) begin
case (hpos[2:0])
0: ram_addr <= row_base + 16'(col);
2: next_char <= ram_read;
3: ram_addr <= row_base + 16'(col) + 32;
5: next_attr <= ram_read;
7: begin
char <= next_char;
attr <= next_attr;
char <= row_buffer[col][7:0];
attr <= row_buffer[col][15:8];
end
endcase
end
end
// extract bit from ROM output
assign rgb = display_on
@ -77,12 +82,13 @@ module test_tilerender_top(clk, reset, hsync, vsync, rgb);
wire [8:0] vpos;
reg [15:0] ram_addr;
wire [7:0] ram_read;
reg [7:0] ram_write = 0;
wire [15:0] ram_read;
reg [15:0] ram_write = 0;
reg ram_writeenable = 0;
wire [10:0] rom_addr;
wire [7:0] rom_data;
wire ram_busy;
hvsync_generator hvsync_gen(
.clk(clk),
@ -95,7 +101,7 @@ module test_tilerender_top(clk, reset, hsync, vsync, rgb);
);
// RAM
RAM #(16,8) ram(
RAM_sync #(16,16) ram(
.clk(clk),
.dout(ram_read),
.din(ram_write),
@ -111,6 +117,7 @@ module test_tilerender_top(clk, reset, hsync, vsync, rgb);
.display_on(display_on),
.ram_addr(ram_addr),
.ram_read(ram_read),
.ram_busy(ram_busy),
.rom_addr(rom_addr),
.rom_data(rom_data),
.rgb(rgb)

View File

@ -23,6 +23,7 @@ var VERILOG_PRESETS = [
{id:'framebuffer.v', name:'Frame Buffer'},
{id:'tile_renderer.v', name:'Tile Renderer'},
{id:'sprite_scanline_renderer.v', name:'Sprite Scanline Renderer'},
{id:'cpu16.v', name:'16-Bit CPU'},
{id:'maze_game.v', name:'Maze Game'},
];
@ -211,6 +212,7 @@ var VerilogPlatform = function(mainElement, options) {
var scopeHeight = videoHeight;
var scopeImageData;
var sdata; // scope data
var module_name;
var yposlist = [];
var lasty = [];
@ -471,6 +473,7 @@ var VerilogPlatform = function(mainElement, options) {
var name = v.name;
ctx.fillStyle = name == inspect_sym ? "yellow" : "white";
name = name.replace(/__DOT__/g,'.');
name = name.replace(module_name+'.','');
ctx.textAlign = 'left';
ctx.fillStyle = "white";
shadowText(ctx, name, 1, yposlist[i]);
@ -578,6 +581,7 @@ var VerilogPlatform = function(mainElement, options) {
gen = new mod(base);
gen.__proto__ = base;
current_output = output;
module_name = output.name ? output.name.substr(1) : "top";
trace_ports = current_output.ports;
trace_signals = current_output.ports.concat(current_output.signals);
trace_index = 0;

View File

@ -1151,9 +1151,13 @@ function compileInlineASM(code, platform, options, errors, asmlines) {
if (i>0) s += ",";
s += 0|out[i];
}
asmlines = asmout.asmlines;
for (var i=0; i<asmlines.length; i++)
asmlines[i].line += firstline;
if (asmlines) {
var al = asmout.asmlines;
for (var i=0; i<al.length; i++) {
al[i].line += firstline;
asmlines.push(al[i]);
}
}
return s;
}
});
@ -1177,7 +1181,7 @@ function compileVerilator(code, platform, options) {
var FS = verilator_mod['FS'];
FS.writeFile(topmod+".v", code);
writeDependencies(options.dependencies, FS, errors, function(d, code) {
return compileInlineASM(code, platform, options, errors, asmlines);
return compileInlineASM(code, platform, options, errors, null);
});
starttime();
try {