diff --git a/presets/verilog/cpu16.v b/presets/verilog/cpu16.v index be0846e3..f4c7d168 100644 --- a/presets/verilog/cpu16.v +++ b/presets/verilog/cpu16.v @@ -1,8 +1,59 @@ `ifndef CPU16_H `define CPU16_H -// include ALU module -`include "cpu8.v" +// ALU operations +`define OP_ZERO 4'h0 +`define OP_LOAD_A 4'h1 +`define OP_INC 4'h2 +`define OP_DEC 4'h3 +`define OP_ASL 4'h4 +`define OP_LSR 4'h5 +`define OP_ROL 4'h6 +`define OP_ROR 4'h7 +`define OP_OR 4'h8 +`define OP_AND 4'h9 +`define OP_XOR 4'ha +`define OP_LOAD_B 4'hb +`define OP_ADD 4'hc +`define OP_SUB 4'hd +`define OP_ADC 4'he +`define OP_SBB 4'hf + + +module ALU(A, B, Y, aluop, carry); + + parameter N = 8; + input [N-1:0] A; + input [N-1:0] B; + output [N:0] Y; + input [3:0] aluop; + input carry; + + always @(*) + case (aluop) + // unary operations + `OP_ZERO: Y = 0; + `OP_LOAD_A: Y = {1'b0, A}; + `OP_INC: Y = A + 1; + `OP_DEC: Y = A - 1; + // unary operations that generate and/or use carry + `OP_ASL: Y = {A, 1'b0}; + `OP_LSR: Y = {A[0], 1'b0, A[N-1:1]}; + `OP_ROL: Y = {A, carry}; + `OP_ROR: Y = {A[0], carry, A[N-1:1]}; + // binary operations + `OP_OR: Y = {1'b0, A | B}; + `OP_AND: Y = {1'b0, A & B}; + `OP_XOR: Y = {1'b0, A ^ B}; + `OP_LOAD_B: Y = {1'b0, B}; + // binary operations that generate and/or use carry + `OP_ADD: Y = A + B; + `OP_SUB: Y = A - B; + `OP_ADC: Y = A + B + (carry?1:0); + `OP_SBB: Y = A - B - (carry?1:0); + endcase + +endmodule /* 00000aaa 0++++bbb operation A+B->A @@ -30,6 +81,9 @@ module CPU16(clk, reset, hold, busy, output [15:0] data_out; output write; + // wait state for RAM? + parameter RAM_WAIT = 1; + reg [15:0] regs[0:7]; // 8 16-bit registers reg [2:0] state; // CPU state @@ -51,6 +105,8 @@ module CPU16(clk, reset, hold, busy, localparam S_SELECT = 1; localparam S_DECODE = 2; localparam S_COMPUTE = 3; + localparam S_DECODE_WAIT = 4; + localparam S_COMPUTE_WAIT = 5; localparam SP = 6; // stack ptr = register 6 localparam IP = 7; // IP = register 7 @@ -86,22 +142,22 @@ module CPU16(clk, reset, hold, busy, busy <= 0; address <= regs[IP]; regs[IP] <= regs[IP] + 1; - state <= S_DECODE; + state <= RAM_WAIT ? S_DECODE_WAIT : S_DECODE; end end // state 2: read/decode opcode S_DECODE: begin + // default next state + state <= RAM_WAIT && data_in[11] ? S_COMPUTE_WAIT : S_COMPUTE; casez (data_in) // 00000aaa0++++bbb operation A+B->A 16'b00000???0???????: begin aluop <= data_in[6:3]; - state <= S_COMPUTE; end // 00001aaa01+++bbb operation A+[B]->A 16'b00001???01??????: begin address <= regs[data_in[2:0]]; aluop <= data_in[6:3]; - state <= S_COMPUTE; if (data_in[2:0] == SP) regs[SP] <= regs[SP] + 1; end @@ -110,18 +166,15 @@ module CPU16(clk, reset, hold, busy, address <= regs[IP]; regs[IP] <= regs[IP] + 1; aluop <= data_in[6:3]; - state <= S_COMPUTE; end // 11+++aaa######## immediate binary operation 16'b11??????????????: begin aluop <= data_in[14:11]; - state <= S_COMPUTE; end // 00101aaa######## load ZP memory 16'b00101???????????: begin address <= {8'b0, data_in[7:0]}; aluop <= `OP_LOAD_B; - state <= S_COMPUTE; end // 00110aaa######## store ZP memory 16'b00110???????????: begin @@ -134,7 +187,6 @@ module CPU16(clk, reset, hold, busy, 16'b01001???????????: begin address <= regs[data_in[2:0]] + 16'($signed(data_in[7:3])); aluop <= `OP_LOAD_B; - state <= S_COMPUTE; if (data_in[2:0] == SP) regs[SP] <= regs[SP] + 1; end @@ -152,11 +204,10 @@ module CPU16(clk, reset, hold, busy, address <= regs[IP]; regs[IP] <= regs[IP] + 1; aluop <= data_in[6:3]; - state <= S_COMPUTE; end - // 01110aaa00cccbbb store A -> [B+#], C -> IP + // 01110aaa00cccbbb store A -> [B], C -> IP 16'b01110???00??????: begin - address <= regs[data_in[2:0]] + 16'($signed(data_in[7:3])); + address <= regs[data_in[2:0]]; data_out <= regs[data_in[10:8]]; write <= 1; state <= S_SELECT; @@ -195,6 +246,13 @@ module CPU16(clk, reset, hold, busy, // repeat CPU loop state <= S_SELECT; end + // wait 1 cycle for RAM read + S_DECODE_WAIT: begin + state <= S_DECODE; + end + S_COMPUTE_WAIT : begin + state <= S_COMPUTE; + end endcase end @@ -212,7 +270,8 @@ module test_CPU16_top( output [15:0] IP, output zero, output carry, - output busy + output busy, + output [2:0] state ); reg [15:0] ram[0:65535]; @@ -221,6 +280,7 @@ module test_CPU16_top( assign IP = cpu.regs[7]; assign zero = cpu.zero; assign carry = cpu.carry; + assign state = cpu.state; CPU16 cpu( .clk(clk), @@ -237,11 +297,11 @@ module test_CPU16_top( ram[address_bus] <= from_cpu; end - always @(*) + always @(posedge clk) if (address_bus[15] == 0) - to_cpu = ram[address_bus]; + to_cpu <= ram[address_bus]; else - to_cpu = rom[address_bus[7:0]]; + to_cpu <= rom[address_bus[7:0]]; `ifdef EXT_INLINE_ASM initial begin @@ -250,13 +310,13 @@ module test_CPU16_top( .arch femto16 .org 0x8000 .len 256 + mov sp,@$6fff mov dx,@Fib jsr dx reset Fib: mov ax,#1 mov bx,#0 - mov sp,@$6fff Loop: mov cx,ax add ax,bx diff --git a/presets/verilog/maze_game.v b/presets/verilog/maze_game.v index 2e67354c..9f1be8ed 100644 --- a/presets/verilog/maze_game.v +++ b/presets/verilog/maze_game.v @@ -5,7 +5,6 @@ `include "sprite_scanline_renderer.v" `include "lfsr.v" `include "sound_generator.v" -`include "cpu8.v" `include "cpu16.v" module maze_game_top(clk, reset, hsync, vsync, rgb); @@ -19,14 +18,26 @@ module maze_game_top(clk, reset, hsync, vsync, rgb); wire [8:0] vpos; // video RAM bus - wire [7:0] vram_read; - reg [7:0] vram_write = 0; - reg vram_writeenable = 0; + wire [15:0] ram_read; + reg [15:0] ram_write = 0; + reg ram_writeenable = 0; // multiplex sprite and tile RAM wire sprite_ram_select = (vpos == 256); reg [15:0] tile_ram_addr; - reg [6:0] sprite_ram_addr; + reg [5:0] sprite_ram_addr; + wire tile_reading; + wire sprite_reading; + wire [14:0] mux_ram_addr; // 15-bit RAM access + + always @(*) + if (cpu_busy) begin + if (sprite_ram_select) + mux_ram_addr = {9'b1111111, sprite_ram_addr}; + else + mux_ram_addr = tile_ram_addr[14:0]; + end else + mux_ram_addr = cpu_ram_addr[14:0]; // tile and sprite ROM wire [10:0] tile_rom_addr; @@ -48,15 +59,13 @@ module maze_game_top(clk, reset, hsync, vsync, rgb); .vpos(vpos) ); - // video RAM (16k) - RAM #(14,8) vram( + // RAM (32k x 16 bits) + RAM_sync #(15,16) ram( .clk(clk), - .dout(vram_read), - .din(vram_write), - .addr(sprite_ram_select - ? {7'b0111111, sprite_ram_addr} - : tile_ram_addr[13:0]), - .we(vram_writeenable) + .dout(ram_read), + .din(ram_write), + .addr(mux_ram_addr), + .we(ram_writeenable) ); tile_renderer tile_gen( @@ -66,7 +75,8 @@ module maze_game_top(clk, reset, hsync, vsync, rgb); .vpos(vpos), .display_on(display_on), .ram_addr(tile_ram_addr), - .ram_read(vram_read), + .ram_read(ram_read), + .ram_busy(tile_reading), .rom_addr(tile_rom_addr), .rom_data(tile_rom_data), .rgb(tile_rgb) @@ -78,7 +88,8 @@ module maze_game_top(clk, reset, hsync, vsync, rgb); .hpos(hpos), .vpos(vpos), .ram_addr(sprite_ram_addr), - .ram_data(vram_read), + .ram_data(ram_read), + .ram_busy(sprite_reading), .rom_addr(sprite_rom_addr), .rom_data(sprite_rom_data), .rgb(sprite_rgb) @@ -93,34 +104,74 @@ module maze_game_top(clk, reset, hsync, vsync, rgb); .addr(sprite_rom_addr), .data(sprite_rom_data) ); - + + // sprites overlay tiles assign rgb = display_on ? (sprite_rgb>0 ? sprite_rgb : tile_rgb) : 0; - // CPU RAM (32k x 16 bits) - RAM #(15,16) mram( - .clk(clk), - .dout(cpuram_read), - .din(cpuram_write), - .addr(cpuram_addr[14:0]), - .we(cpuram_writeenable) - ); - - reg [15:0] cpuram_read; - reg [15:0] cpuram_write; - reg [15:0] cpuram_addr; - reg cpuram_writeenable; + // CPU + reg cpu_hold = 0; + wire cpu_busy; + wire [15:0] cpu_ram_addr; wire busy; + wire [15:0] cpu_bus; + + assign cpu_bus = cpu_ram_addr[15] + ? rom[cpu_ram_addr[9:0]] + : ram_read; CPU16 cpu( .clk(clk), .reset(reset), - .hold(0), - .busy(busy), - .address(cpuram_addr), - .data_in(cpuram_read), - .data_out(cpuram_write), - .write(cpuram_writeenable)); + .hold(tile_reading | sprite_reading), + .busy(cpu_busy), + .address(cpu_ram_addr), + .data_in(cpu_bus), + .data_out(ram_write), + .write(ram_writeenable)); + + reg [15:0] rom[0:1023]; +`ifdef EXT_INLINE_ASM + initial begin + rom = '{ + __asm +.arch femto16 +.org 0x8000 +.len 1024 + mov sp,@$6fff + mov dx,@Init + jsr dx + mov ax,#0 + mov dx,@Clear + jsr dx + reset +Init: + mov ax,@$6000 ; screen buffer + mov bx,@$7e00 ; page table start + mov cx,#32 ; 32 rows +InitLoop: + mov [bx],ax + mov [ax],ax + add ax,#32 + inc bx + dec cx + bnz InitLoop + rts +Clear: + mov bx,@$7e00 + mov cx,@1024 +ClearLoop: + mov [bx],ax + inc bx + dec cx + bnz ClearLoop + + rts + __endasm + }; + end +`endif + endmodule diff --git a/presets/verilog/ram.v b/presets/verilog/ram.v index 302a0cee..d069fe66 100644 --- a/presets/verilog/ram.v +++ b/presets/verilog/ram.v @@ -3,7 +3,7 @@ `include "hvsync_generator.v" -module RAM(clk, addr, din, dout, we); +module RAM_sync(clk, addr, din, dout, we); parameter A = 10; // # of address bits parameter D = 8; // # of data bits @@ -19,9 +19,31 @@ module RAM(clk, addr, din, dout, we); always @(posedge clk) begin if (we) // if write enabled mem[addr] <= din; // write memory from din - dout <= mem[addr]; // read memory to dout + dout <= mem[addr]; // read memory to dout (sync) end endmodule +module RAM_async(clk, addr, din, dout, we); + + parameter A = 10; // # of address bits + parameter D = 8; // # of data bits + + input clk; // clock + input [A-1:0] addr; // 10-bit address + input [D-1:0] din; // 8-bit data input + output [D-1:0] dout; // 8-bit data output + input we; // write enable + + reg [D-1:0] mem [0:(1<= 256 if (reset || vpos[8]) begin // load sprites from RAM on line 260 // 8 cycles per sprite if (vpos == 260 && hpos < N*8) begin + ram_busy <= 1; case (hpos[2:0]) - 0: begin - ram_addr <= {load_index, 2'b00}; + 3: begin + ram_addr <= {load_index, 1'b0}; end - 2: begin - sprite_xpos[load_index] <= ram_data; - ram_addr <= {load_index, 2'b01}; + 5: begin + sprite_xpos[load_index] <= ram_data[7:0]; + sprite_ypos[load_index] <= ram_data[15:8]; + ram_addr <= {load_index, 1'b1}; end - 4: begin - sprite_ypos[load_index] <= ram_data; - ram_addr <= {load_index, 2'b10}; - end - 6: begin - sprite_attr[load_index] <= ram_data; + 7: begin + sprite_attr[load_index] <= ram_data[7:0]; end endcase end @@ -202,13 +203,14 @@ module test_scanline_render_top(clk, reset, hsync, vsync, rgb); .data(rom_data) ); - wire [6:0] ram_addr; - wire [7:0] ram_read; - reg [7:0] ram_write; + wire [5:0] ram_addr; + wire [15:0] ram_read; + reg [15:0] ram_write; reg ram_we; + wire ram_busy; - // 128-byte RAM - RAM #(7,8) ram( + // 64-word RAM + RAM_sync #(6,16) ram( .clk(clk), .addr(ram_addr), .dout(ram_read), @@ -224,6 +226,7 @@ module test_scanline_render_top(clk, reset, hsync, vsync, rgb); .rgb(rgb), .ram_addr(ram_addr), .ram_data(ram_read), + .ram_busy(ram_busy), .rom_addr(rom_addr), .rom_data(rom_data) ); @@ -231,13 +234,13 @@ module test_scanline_render_top(clk, reset, hsync, vsync, rgb); always @(posedge clk) begin // wiggle sprites randomly once per frame if (vpos == 256) begin - ram_addr <= hpos[8:2]; + ram_addr <= hpos[7:2]; // 4 clocks per read/write cycle if (!hpos[1]) begin ram_we <= 0; end else begin ram_we <= 1; - ram_write <= ram_read + 8'(($random&3)-1); + ram_write <= ram_read + 16'(($random&3)-1); end end else ram_we <= 0; diff --git a/presets/verilog/tile_renderer.v b/presets/verilog/tile_renderer.v index b25873ab..d50d27d9 100644 --- a/presets/verilog/tile_renderer.v +++ b/presets/verilog/tile_renderer.v @@ -4,7 +4,7 @@ module tile_renderer(clk, reset, hpos, vpos, display_on, rgb, - ram_addr, ram_read, + ram_addr, ram_read, ram_busy, rom_addr, rom_data); input clk, reset; @@ -14,12 +14,13 @@ module tile_renderer(clk, reset, hpos, vpos, display_on, output [3:0] rgb; output reg [15:0] ram_addr; - input [7:0] ram_read; + input [15:0] ram_read; + output reg ram_busy; output [10:0] rom_addr; input [7:0] rom_data; - reg [7:0] page_base = 0; // page table base (8 bits) + reg [7:0] page_base = 8'h7e; // page table base (8 bits) reg [15:0] row_base; // row table base (16 bits) wire [4:0] row = vpos[7:3]; // 5-bit row, vpos / 8 @@ -29,35 +30,39 @@ module tile_renderer(clk, reset, hpos, vpos, display_on, reg [7:0] char; reg [7:0] attr; - reg [7:0] next_char; - reg [7:0] next_attr; // tile ROM address assign rom_addr = {char, yofs}; + + reg [15:0] row_buffer[0:31]; // lookup char and attr - always @(posedge clk) - if (hpos[8]) begin + always @(posedge clk) begin + // time to read a row? + if (vpos[2:0] == 7) begin + // read row_base from page table (2 bytes) case (hpos[7:0]) - // read row_base from page table (2 bytes) - // TODO: why 2 cycles? - 0: ram_addr <= {page_base, row, 3'b000}; - 2: row_base[7:0] <= ram_read; - 3: ram_addr <= {page_base, row, 3'b001}; - 5: row_base[15:8] <= ram_read; + 186: ram_busy <= 1; + 190: ram_addr <= {page_base, 3'b000, row}; + 192: row_base <= ram_read; + 192+32: ram_busy <= 0; endcase - end else begin + // load row of tile data from RAM + if (hpos >= 192 && hpos < 192+32) begin + ram_addr <= row_base + 16'(hpos[4:0]); + row_buffer[hpos[4:0]-2] <= ram_read; + end + end + // latch character data + if (hpos < 256) begin case (hpos[2:0]) - 0: ram_addr <= row_base + 16'(col); - 2: next_char <= ram_read; - 3: ram_addr <= row_base + 16'(col) + 32; - 5: next_attr <= ram_read; 7: begin - char <= next_char; - attr <= next_attr; + char <= row_buffer[col][7:0]; + attr <= row_buffer[col][15:8]; end endcase end + end // extract bit from ROM output assign rgb = display_on @@ -77,12 +82,13 @@ module test_tilerender_top(clk, reset, hsync, vsync, rgb); wire [8:0] vpos; reg [15:0] ram_addr; - wire [7:0] ram_read; - reg [7:0] ram_write = 0; + wire [15:0] ram_read; + reg [15:0] ram_write = 0; reg ram_writeenable = 0; wire [10:0] rom_addr; wire [7:0] rom_data; + wire ram_busy; hvsync_generator hvsync_gen( .clk(clk), @@ -95,7 +101,7 @@ module test_tilerender_top(clk, reset, hsync, vsync, rgb); ); // RAM - RAM #(16,8) ram( + RAM_sync #(16,16) ram( .clk(clk), .dout(ram_read), .din(ram_write), @@ -111,6 +117,7 @@ module test_tilerender_top(clk, reset, hsync, vsync, rgb); .display_on(display_on), .ram_addr(ram_addr), .ram_read(ram_read), + .ram_busy(ram_busy), .rom_addr(rom_addr), .rom_data(rom_data), .rgb(rgb) diff --git a/src/platform/verilog.js b/src/platform/verilog.js index fbaa5b4e..beefbc93 100644 --- a/src/platform/verilog.js +++ b/src/platform/verilog.js @@ -23,6 +23,7 @@ var VERILOG_PRESETS = [ {id:'framebuffer.v', name:'Frame Buffer'}, {id:'tile_renderer.v', name:'Tile Renderer'}, {id:'sprite_scanline_renderer.v', name:'Sprite Scanline Renderer'}, + {id:'cpu16.v', name:'16-Bit CPU'}, {id:'maze_game.v', name:'Maze Game'}, ]; @@ -211,6 +212,7 @@ var VerilogPlatform = function(mainElement, options) { var scopeHeight = videoHeight; var scopeImageData; var sdata; // scope data + var module_name; var yposlist = []; var lasty = []; @@ -471,6 +473,7 @@ var VerilogPlatform = function(mainElement, options) { var name = v.name; ctx.fillStyle = name == inspect_sym ? "yellow" : "white"; name = name.replace(/__DOT__/g,'.'); + name = name.replace(module_name+'.',''); ctx.textAlign = 'left'; ctx.fillStyle = "white"; shadowText(ctx, name, 1, yposlist[i]); @@ -578,6 +581,7 @@ var VerilogPlatform = function(mainElement, options) { gen = new mod(base); gen.__proto__ = base; current_output = output; + module_name = output.name ? output.name.substr(1) : "top"; trace_ports = current_output.ports; trace_signals = current_output.ports.concat(current_output.signals); trace_index = 0; diff --git a/src/worker/workermain.js b/src/worker/workermain.js index 882cc68a..45fc0177 100644 --- a/src/worker/workermain.js +++ b/src/worker/workermain.js @@ -1151,9 +1151,13 @@ function compileInlineASM(code, platform, options, errors, asmlines) { if (i>0) s += ","; s += 0|out[i]; } - asmlines = asmout.asmlines; - for (var i=0; i