mirror of
https://github.com/sehugg/8bitworkshop.git
synced 2026-04-25 18:47:56 +00:00
verilog: femtorv32 RISC-V module, fixes to assembler
This commit is contained in:
@@ -0,0 +1,530 @@
|
||||
/*******************************************************************/
|
||||
// FemtoRV32, a collection of minimalistic RISC-V RV32 cores.
|
||||
// This version: The "Quark", the most elementary version of FemtoRV32.
|
||||
// A single VERILOG file, compact & understandable code.
|
||||
// (200 lines of code, 400 lines counting comments)
|
||||
//
|
||||
// Instruction set: RV32I + RDCYCLES
|
||||
//
|
||||
// Parameters:
|
||||
// Reset address can be defined using RESET_ADDR (default is 0).
|
||||
//
|
||||
// The ADDR_WIDTH parameter lets you define the width of the internal
|
||||
// address bus (and address computation logic).
|
||||
//
|
||||
// Macros:
|
||||
// optionally one may define NRV_IS_IO_ADDR(addr), that is supposed to:
|
||||
// evaluate to 1 if addr is in mapped IO space,
|
||||
// evaluate to 0 otherwise
|
||||
// (additional wait states are used when in IO space).
|
||||
// If left undefined, wait states are always used.
|
||||
//
|
||||
// NRV_COUNTER_WIDTH may be defined to reduce the number of bits used
|
||||
// by the ticks counter. If not defined, a 32-bits counter is generated.
|
||||
// (reducing its width may be useful for space-constrained designs).
|
||||
//
|
||||
// Bruno Levy, Matthias Koch, 2020-2021
|
||||
/*******************************************************************/
|
||||
|
||||
// Firmware generation flags for this processor
|
||||
`define NRV_ARCH "rv32i"
|
||||
`define NRV_ABI "ilp32"
|
||||
`define NRV_OPTIMIZE "-Os"
|
||||
|
||||
module FemtoRV32(
|
||||
input clk,
|
||||
|
||||
output [31:0] mem_addr, // address bus
|
||||
output [31:0] mem_wdata, // data to be written
|
||||
output [3:0] mem_wmask, // write mask for the 4 bytes of each word
|
||||
input [31:0] mem_rdata, // input lines for both data and instr
|
||||
output mem_rstrb, // active to initiate memory read (used by IO)
|
||||
input mem_rbusy, // asserted if memory is busy reading value
|
||||
input mem_wbusy, // asserted if memory is busy writing value
|
||||
|
||||
input reset // set to 0 to reset the processor
|
||||
);
|
||||
|
||||
parameter RESET_ADDR = 32'h00000000;
|
||||
parameter ADDR_WIDTH = 24;
|
||||
|
||||
/***************************************************************************/
|
||||
// Instruction decoding.
|
||||
/***************************************************************************/
|
||||
|
||||
// Extracts rd,rs1,rs2,funct3,imm and opcode from instruction.
|
||||
// Reference: Table page 104 of:
|
||||
// https://content.riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf
|
||||
|
||||
// The destination register
|
||||
wire [4:0] rdId = instr[11:7];
|
||||
|
||||
// The ALU function, decoded in 1-hot form (doing so reduces LUT count)
|
||||
// It is used as follows: funct3Is[val] <=> funct3 == val
|
||||
(* onehot *)
|
||||
wire [7:0] funct3Is = 8'b00000001 << instr[14:12];
|
||||
|
||||
// The five immediate formats, see RiscV reference (link above), Fig. 2.4 p. 12
|
||||
wire [31:0] Uimm = { instr[31], instr[30:12], {12{1'b0}}};
|
||||
wire [31:0] Iimm = {{21{instr[31]}}, instr[30:20]};
|
||||
/* verilator lint_off UNUSED */ // MSBs of SBJimms are not used by addr adder.
|
||||
wire [31:0] Simm = {{21{instr[31]}}, instr[30:25],instr[11:7]};
|
||||
wire [31:0] Bimm = {{20{instr[31]}}, instr[7],instr[30:25],instr[11:8],1'b0};
|
||||
wire [31:0] Jimm = {{12{instr[31]}}, instr[19:12],instr[20],instr[30:21],1'b0};
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
// Base RISC-V (RV32I) has only 10 different instructions !
|
||||
wire isLoad = (instr[6:2] == 5'b00000); // rd <- mem[rs1+Iimm]
|
||||
wire isALUimm = (instr[6:2] == 5'b00100); // rd <- rs1 OP Iimm
|
||||
wire isAUIPC = (instr[6:2] == 5'b00101); // rd <- PC + Uimm
|
||||
wire isStore = (instr[6:2] == 5'b01000); // mem[rs1+Simm] <- rs2
|
||||
wire isALUreg = (instr[6:2] == 5'b01100); // rd <- rs1 OP rs2
|
||||
wire isLUI = (instr[6:2] == 5'b01101); // rd <- Uimm
|
||||
wire isBranch = (instr[6:2] == 5'b11000); // if(rs1 OP rs2) PC<-PC+Bimm
|
||||
wire isJALR = (instr[6:2] == 5'b11001); // rd <- PC+4; PC<-rs1+Iimm
|
||||
wire isJAL = (instr[6:2] == 5'b11011); // rd <- PC+4; PC<-PC+Jimm
|
||||
wire isSYSTEM = (instr[6:2] == 5'b11100); // rd <- cycles
|
||||
|
||||
wire isALU = isALUimm | isALUreg;
|
||||
|
||||
/***************************************************************************/
|
||||
// The register file.
|
||||
/***************************************************************************/
|
||||
|
||||
reg [31:0] rs1;
|
||||
reg [31:0] rs2;
|
||||
reg [31:0] registerFile [31:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (writeBack)
|
||||
if (rdId != 0)
|
||||
registerFile[rdId] <= writeBackData;
|
||||
end
|
||||
|
||||
/***************************************************************************/
|
||||
// The ALU. Does operations and tests combinatorially, except shifts.
|
||||
/***************************************************************************/
|
||||
|
||||
// First ALU source, always rs1
|
||||
wire [31:0] aluIn1 = rs1;
|
||||
|
||||
// Second ALU source, depends on opcode:
|
||||
// ALUreg, Branch: rs2
|
||||
// ALUimm, Load, JALR: Iimm
|
||||
wire [31:0] aluIn2 = isALUreg | isBranch ? rs2 : Iimm;
|
||||
|
||||
// The adder is used by both arithmetic instructions and JALR.
|
||||
wire [31:0] aluPlus = aluIn1 + aluIn2;
|
||||
|
||||
// Use a single 33 bits subtract to do subtraction and all comparisons
|
||||
// (trick borrowed from swapforth/J1)
|
||||
wire [32:0] aluMinus = {1'b1, ~aluIn2} + {1'b0,aluIn1} + 33'b1;
|
||||
wire LT = (aluIn1[31] ^ aluIn2[31]) ? aluIn1[31] : aluMinus[32];
|
||||
wire LTU = aluMinus[32];
|
||||
wire EQ = (aluMinus[31:0] == 0);
|
||||
|
||||
/***************************************************************************/
|
||||
|
||||
// Use the same shifter both for left and right shifts by
|
||||
// applying bit reversal
|
||||
|
||||
wire [31:0] shifter_in = funct3Is[1] ?
|
||||
{aluIn1[ 0], aluIn1[ 1], aluIn1[ 2], aluIn1[ 3], aluIn1[ 4], aluIn1[ 5],
|
||||
aluIn1[ 6], aluIn1[ 7], aluIn1[ 8], aluIn1[ 9], aluIn1[10], aluIn1[11],
|
||||
aluIn1[12], aluIn1[13], aluIn1[14], aluIn1[15], aluIn1[16], aluIn1[17],
|
||||
aluIn1[18], aluIn1[19], aluIn1[20], aluIn1[21], aluIn1[22], aluIn1[23],
|
||||
aluIn1[24], aluIn1[25], aluIn1[26], aluIn1[27], aluIn1[28], aluIn1[29],
|
||||
aluIn1[30], aluIn1[31]} : aluIn1;
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
wire [31:0] shifter =
|
||||
$signed({instr[30] & aluIn1[31], shifter_in}) >>> aluIn2[4:0];
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
wire [31:0] leftshift = {
|
||||
shifter[ 0], shifter[ 1], shifter[ 2], shifter[ 3], shifter[ 4],
|
||||
shifter[ 5], shifter[ 6], shifter[ 7], shifter[ 8], shifter[ 9],
|
||||
shifter[10], shifter[11], shifter[12], shifter[13], shifter[14],
|
||||
shifter[15], shifter[16], shifter[17], shifter[18], shifter[19],
|
||||
shifter[20], shifter[21], shifter[22], shifter[23], shifter[24],
|
||||
shifter[25], shifter[26], shifter[27], shifter[28], shifter[29],
|
||||
shifter[30], shifter[31]};
|
||||
|
||||
/***************************************************************************/
|
||||
|
||||
// Notes:
|
||||
// - instr[30] is 1 for SUB and 0 for ADD
|
||||
// - for SUB, need to test also instr[5] to discriminate ADDI:
|
||||
// (1 for ADD/SUB, 0 for ADDI, and Iimm used by ADDI overlaps bit 30 !)
|
||||
// - instr[30] is 1 for SRA (do sign extension) and 0 for SRL
|
||||
|
||||
wire [31:0] aluOut =
|
||||
(funct3Is[0] ? instr[30] & instr[5] ? aluMinus[31:0] : aluPlus : 32'b0) |
|
||||
(funct3Is[1] ? leftshift : 32'b0) |
|
||||
(funct3Is[2] ? {31'b0, LT} : 32'b0) |
|
||||
(funct3Is[3] ? {31'b0, LTU} : 32'b0) |
|
||||
(funct3Is[4] ? aluIn1 ^ aluIn2 : 32'b0) |
|
||||
(funct3Is[5] ? shifter : 32'b0) |
|
||||
(funct3Is[6] ? aluIn1 | aluIn2 : 32'b0) |
|
||||
(funct3Is[7] ? aluIn1 & aluIn2 : 32'b0) ;
|
||||
|
||||
/***************************************************************************/
|
||||
// The predicate for conditional branches.
|
||||
/***************************************************************************/
|
||||
|
||||
wire predicate =
|
||||
funct3Is[0] & EQ | // BEQ
|
||||
funct3Is[1] & !EQ | // BNE
|
||||
funct3Is[4] & LT | // BLT
|
||||
funct3Is[5] & !LT | // BGE
|
||||
funct3Is[6] & LTU | // BLTU
|
||||
funct3Is[7] & !LTU ; // BGEU
|
||||
|
||||
/***************************************************************************/
|
||||
// Program counter and branch target computation.
|
||||
/***************************************************************************/
|
||||
|
||||
reg [ADDR_WIDTH-1:0] PC; // The program counter.
|
||||
reg [31:2] instr; // Latched instruction. Note that bits 0 and 1 are
|
||||
// ignored (not used in RV32I base instr set).
|
||||
|
||||
wire [ADDR_WIDTH-1:0] PCplus4 = PC + 4;
|
||||
|
||||
// An adder used to compute branch address, JAL address and AUIPC.
|
||||
// branch->PC+Bimm AUIPC->PC+Uimm JAL->PC+Jimm
|
||||
// Equivalent to PCplusImm = PC + (isJAL ? Jimm : isAUIPC ? Uimm : Bimm)
|
||||
wire [ADDR_WIDTH-1:0] PCplusImm = PC + ( instr[3] ? Jimm[ADDR_WIDTH-1:0] :
|
||||
instr[4] ? Uimm[ADDR_WIDTH-1:0] :
|
||||
Bimm[ADDR_WIDTH-1:0] );
|
||||
|
||||
// A separate adder to compute the destination of load/store.
|
||||
// testing instr[5] is equivalent to testing isStore in this context.
|
||||
wire [ADDR_WIDTH-1:0] loadstore_addr = rs1[ADDR_WIDTH-1:0] +
|
||||
(instr[5] ? Simm[ADDR_WIDTH-1:0] : Iimm[ADDR_WIDTH-1:0]);
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
// internal address registers and cycles counter may have less than
|
||||
// 32 bits, so we deactivate width test for mem_addr and writeBackData
|
||||
|
||||
wire [ADDR_WIDTH-1:0] PC_new =
|
||||
isJALR ? {aluPlus[ADDR_WIDTH-1:1],1'b0} :
|
||||
jumpToPCplusImm ? PCplusImm :
|
||||
PCplus4;
|
||||
|
||||
assign mem_addr = state[WAIT_INSTR_bit] | state[FETCH_INSTR_bit] ? PC :
|
||||
state[EXECUTE_bit] & ~isLoad & ~isStore ? PC_new :
|
||||
loadstore_addr ;
|
||||
|
||||
/***************************************************************************/
|
||||
// The value written back to the register file.
|
||||
/***************************************************************************/
|
||||
|
||||
wire [31:0] writeBackData =
|
||||
(isSYSTEM ? cycles : 32'b0) | // SYSTEM
|
||||
(isLUI ? Uimm : 32'b0) | // LUI
|
||||
(isALU ? aluOut : 32'b0) | // ALUreg, ALUimm
|
||||
(isAUIPC ? PCplusImm : 32'b0) | // AUIPC
|
||||
(isJALR | isJAL ? PCplus4 : 32'b0) | // JAL, JALR
|
||||
(isLoad ? LOAD_data : 32'b0) ; // Load
|
||||
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
|
||||
/***************************************************************************/
|
||||
// LOAD/STORE
|
||||
/***************************************************************************/
|
||||
|
||||
// All memory accesses are aligned on 32 bits boundary. For this
|
||||
// reason, we need some circuitry that does unaligned halfword
|
||||
// and byte load/store, based on:
|
||||
// - funct3[1:0]: 00->byte 01->halfword 10->word
|
||||
// - mem_addr[1:0]: indicates which byte/halfword is accessed
|
||||
|
||||
wire mem_byteAccess = instr[13:12] == 2'b00; // funct3[1:0] == 2'b00;
|
||||
wire mem_halfwordAccess = instr[13:12] == 2'b01; // funct3[1:0] == 2'b01;
|
||||
|
||||
// LOAD, in addition to funct3[1:0], LOAD depends on:
|
||||
// - funct3[2] (instr[14]): 0->do sign expansion 1->no sign expansion
|
||||
|
||||
wire LOAD_sign =
|
||||
!instr[14] & (mem_byteAccess ? LOAD_byte[7] : LOAD_halfword[15]);
|
||||
|
||||
wire [31:0] LOAD_data =
|
||||
mem_byteAccess ? {{24{LOAD_sign}}, LOAD_byte} :
|
||||
mem_halfwordAccess ? {{16{LOAD_sign}}, LOAD_halfword} :
|
||||
mem_rdata ;
|
||||
|
||||
wire [15:0] LOAD_halfword =
|
||||
loadstore_addr[1] ? mem_rdata[31:16] : mem_rdata[15:0];
|
||||
|
||||
wire [7:0] LOAD_byte =
|
||||
loadstore_addr[0] ? LOAD_halfword[15:8] : LOAD_halfword[7:0];
|
||||
|
||||
// STORE
|
||||
|
||||
assign mem_wdata[ 7: 0] = rs2[7:0];
|
||||
assign mem_wdata[15: 8] = loadstore_addr[0] ? rs2[7:0] : rs2[15: 8];
|
||||
assign mem_wdata[23:16] = loadstore_addr[1] ? rs2[7:0] : rs2[23:16];
|
||||
assign mem_wdata[31:24] = loadstore_addr[0] ? rs2[7:0] :
|
||||
loadstore_addr[1] ? rs2[15:8] : rs2[31:24];
|
||||
|
||||
// The memory write mask:
|
||||
// 1111 if writing a word
|
||||
// 0011 or 1100 if writing a halfword
|
||||
// (depending on loadstore_addr[1])
|
||||
// 0001, 0010, 0100 or 1000 if writing a byte
|
||||
// (depending on loadstore_addr[1:0])
|
||||
|
||||
wire [3:0] STORE_wmask =
|
||||
mem_byteAccess ?
|
||||
(loadstore_addr[1] ?
|
||||
(loadstore_addr[0] ? 4'b1000 : 4'b0100) :
|
||||
(loadstore_addr[0] ? 4'b0010 : 4'b0001)
|
||||
) :
|
||||
mem_halfwordAccess ?
|
||||
(loadstore_addr[1] ? 4'b1100 : 4'b0011) :
|
||||
4'b1111;
|
||||
|
||||
/*************************************************************************/
|
||||
// And, last but not least, the state machine.
|
||||
/*************************************************************************/
|
||||
|
||||
localparam FETCH_INSTR_bit = 0;
|
||||
localparam WAIT_INSTR_bit = 1;
|
||||
localparam EXECUTE_bit = 2;
|
||||
localparam WAIT_ALU_OR_MEM_bit = 3;
|
||||
localparam NB_STATES = 4;
|
||||
|
||||
localparam FETCH_INSTR = 1 << FETCH_INSTR_bit;
|
||||
localparam WAIT_INSTR = 1 << WAIT_INSTR_bit;
|
||||
localparam EXECUTE = 1 << EXECUTE_bit;
|
||||
localparam WAIT_ALU_OR_MEM = 1 << WAIT_ALU_OR_MEM_bit;
|
||||
|
||||
(* onehot *)
|
||||
reg [NB_STATES-1:0] state;
|
||||
|
||||
// The signals (internal and external) that are determined
|
||||
// combinatorially from state and other signals.
|
||||
|
||||
// register write-back enable.
|
||||
wire writeBack = ~(isBranch | isStore ) &
|
||||
(state[EXECUTE_bit] | state[WAIT_ALU_OR_MEM_bit]);
|
||||
|
||||
// The memory-read signal.
|
||||
assign mem_rstrb = state[EXECUTE_bit] & ~isStore | state[FETCH_INSTR_bit];
|
||||
|
||||
// The mask for memory-write.
|
||||
assign mem_wmask = {4{state[EXECUTE_bit] & isStore}} & STORE_wmask;
|
||||
|
||||
wire jumpToPCplusImm = isJAL | (isBranch & predicate);
|
||||
`ifdef NRV_IS_IO_ADDR
|
||||
wire needToWait = isLoad |
|
||||
isStore & `NRV_IS_IO_ADDR(mem_addr) ;
|
||||
`else
|
||||
wire needToWait = isLoad | isStore ;
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
// Handle reset (high) signal
|
||||
if(reset) begin
|
||||
state <= WAIT_ALU_OR_MEM; // Just waiting for !mem_wbusy
|
||||
PC <= RESET_ADDR[ADDR_WIDTH-1:0];
|
||||
end else
|
||||
|
||||
// See note [1] at the end of this file.
|
||||
(* parallel_case *)
|
||||
case(1'b1)
|
||||
|
||||
state[WAIT_INSTR_bit]: begin
|
||||
if(!mem_rbusy) begin // may be high when executing from SPI flash
|
||||
rs1 <= registerFile[mem_rdata[19:15]];
|
||||
rs2 <= registerFile[mem_rdata[24:20]];
|
||||
instr <= mem_rdata[31:2]; // Bits 0 and 1 are ignored (see
|
||||
state <= EXECUTE; // also the declaration of instr).
|
||||
end
|
||||
end
|
||||
|
||||
state[EXECUTE_bit]: begin
|
||||
PC <= PC_new;
|
||||
state <= needToWait ? WAIT_ALU_OR_MEM : WAIT_INSTR;
|
||||
end
|
||||
|
||||
state[WAIT_ALU_OR_MEM_bit]: begin
|
||||
if(!mem_rbusy & !mem_wbusy) state <= FETCH_INSTR;
|
||||
end
|
||||
|
||||
default: begin // FETCH_INSTR
|
||||
state <= WAIT_INSTR;
|
||||
end
|
||||
|
||||
endcase
|
||||
end
|
||||
|
||||
/***************************************************************************/
|
||||
// Cycle counter
|
||||
/***************************************************************************/
|
||||
|
||||
`ifdef NRV_COUNTER_WIDTH
|
||||
reg [`NRV_COUNTER_WIDTH-1:0] cycles;
|
||||
`else
|
||||
reg [31:0] cycles;
|
||||
`endif
|
||||
always @(posedge clk) cycles <= cycles + 1;
|
||||
|
||||
`ifdef BENCH
|
||||
initial begin
|
||||
cycles = 0;
|
||||
registerFile[0] = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
/*****************************************************************************/
|
||||
// Notes:
|
||||
//
|
||||
// [1] About the "reverse case" statement, also used in Claire Wolf's picorv32:
|
||||
// It is just a cleaner way of writing a series of cascaded if() statements,
|
||||
// To understand it, think about the case statement *in general* as follows:
|
||||
// case (expr)
|
||||
// val_1: statement_1
|
||||
// val_2: statement_2
|
||||
// ... val_n: statement_n
|
||||
// endcase
|
||||
// The first statement_i such that expr == val_i is executed.
|
||||
// Now if expr is 1'b1:
|
||||
// case (1'b1)
|
||||
// cond_1: statement_1
|
||||
// cond_2: statement_2
|
||||
// ... cond_n: statement_n
|
||||
// endcase
|
||||
// It is *exactly the same thing*, the first statement_i such that
|
||||
// expr == cond_i is executed (that is, such that 1'b1 == cond_i,
|
||||
// in other words, such that cond_i is true)
|
||||
// More on this:
|
||||
// https://stackoverflow.com/questions/15418636/case-statement-in-verilog
|
||||
//
|
||||
// [2] state uses 1-hot encoding (at any time, state has only one bit set to 1).
|
||||
// It uses a larger number of bits (one bit per state), but often results in
|
||||
// a both more compact (fewer LUTs) and faster state machine.
|
||||
|
||||
// https://github.com/BrunoLevy/learn-fpga/blob/master/FemtoRV/RTL/PROCESSOR/femtorv32_quark_bicycle.v
|
||||
|
||||
|
||||
`ifdef TOPMOD__test_FemtoRV32_top
|
||||
|
||||
module test_FemtoRV32_top(
|
||||
input clk,
|
||||
input reset,
|
||||
output [31:0] address_bus,
|
||||
output [31:0] to_cpu,
|
||||
output [31:0] from_cpu,
|
||||
output [3:0] write_mask,
|
||||
output write_strobe,
|
||||
output [23:0] PC,
|
||||
output [3:0] state_bits
|
||||
);
|
||||
|
||||
// Memory arrays
|
||||
reg [31:0] ram[0:4095]; // 16KB RAM
|
||||
reg [31:0] rom[0:255]; // 1KB ROM at high addresses
|
||||
|
||||
// Memory read data
|
||||
reg [31:0] mem_rdata;
|
||||
|
||||
// Memory busy signals (always ready for this simple testbench)
|
||||
wire mem_rbusy = 0;
|
||||
wire mem_wbusy = 0;
|
||||
|
||||
// CPU interface wires
|
||||
wire [31:0] mem_addr;
|
||||
wire [31:0] mem_wdata;
|
||||
wire [3:0] mem_wmask;
|
||||
wire mem_rstrb;
|
||||
|
||||
// Expose signals for debugging
|
||||
assign address_bus = mem_addr;
|
||||
assign from_cpu = mem_wdata;
|
||||
assign write_mask = mem_wmask;
|
||||
assign write_strobe = |mem_wmask; // Any write mask bit set means write
|
||||
assign to_cpu = mem_rdata;
|
||||
assign PC = cpu.PC;
|
||||
assign state_bits = cpu.state[3:0];
|
||||
|
||||
// Instantiate the FemtoRV32 CPU
|
||||
FemtoRV32 #(
|
||||
.RESET_ADDR(32'h00001000), // Start from ROM area
|
||||
.ADDR_WIDTH(24)
|
||||
) cpu (
|
||||
.clk(clk),
|
||||
.mem_addr(mem_addr),
|
||||
.mem_wdata(mem_wdata),
|
||||
.mem_wmask(mem_wmask),
|
||||
.mem_rdata(mem_rdata),
|
||||
.mem_rstrb(mem_rstrb),
|
||||
.mem_rbusy(mem_rbusy),
|
||||
.mem_wbusy(mem_wbusy),
|
||||
.reset(reset)
|
||||
);
|
||||
|
||||
// Memory address decoding
|
||||
wire ram_sel = (mem_addr[31:12] == 20'h00000); // 0x0000-0x0FFF: RAM
|
||||
wire rom_sel = (mem_addr[31:12] == 20'h00001); // 0x1000-0x1FFF: ROM
|
||||
|
||||
// Memory write logic
|
||||
always @(posedge clk) begin
|
||||
if (ram_sel) begin
|
||||
if (mem_wmask[0]) ram[mem_addr[13:2]][7:0] <= mem_wdata[7:0];
|
||||
if (mem_wmask[1]) ram[mem_addr[13:2]][15:8] <= mem_wdata[15:8];
|
||||
if (mem_wmask[2]) ram[mem_addr[13:2]][23:16] <= mem_wdata[23:16];
|
||||
if (mem_wmask[3]) ram[mem_addr[13:2]][31:24] <= mem_wdata[31:24];
|
||||
end
|
||||
end
|
||||
|
||||
// Memory read logic (combinatorial for simplicity)
|
||||
always @(*) begin
|
||||
if (rom_sel)
|
||||
mem_rdata = rom[mem_addr[9:2]]; // Word-aligned ROM access
|
||||
else if (ram_sel)
|
||||
mem_rdata = ram[mem_addr[13:2]]; // Word-aligned RAM access
|
||||
else
|
||||
mem_rdata = 32'h00000000;
|
||||
end
|
||||
|
||||
`ifdef EXT_INLINE_ASM
|
||||
initial begin
|
||||
rom = '{
|
||||
__asm
|
||||
.arch riscv
|
||||
.org 4096
|
||||
.len 256
|
||||
|
||||
; RISC-V Fibonacci test program
|
||||
; x1 = current fib number
|
||||
; x2 = previous fib number
|
||||
; x3 = temporary
|
||||
; Address 0 in RAM will store results
|
||||
|
||||
start:
|
||||
addi x1, x0, 1 ; x1 = 1 (first fibonacci number)
|
||||
addi x2, x0, 0 ; x2 = 0 (second fibonacci number)
|
||||
|
||||
loop:
|
||||
add x3, x1, x2 ; x3 = x1 + x2 (next fibonacci)
|
||||
addi x2, x1, 0 ; x2 = x1 (shift previous)
|
||||
addi x1, x3, 0 ; x1 = x3 (shift current)
|
||||
|
||||
sw x1, 0(x0) ; Store result to RAM address 0
|
||||
lw x4, 0(x0) ; Load it back to x4
|
||||
|
||||
beq x0, x0, loop ; Loop forever
|
||||
|
||||
__endasm
|
||||
};
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
`endif
|
||||
|
||||
@@ -0,0 +1,146 @@
|
||||
|
||||
`include "hvsync_generator.v"
|
||||
`include "femtorv32.v"
|
||||
|
||||
module frame_buffer_riscv_top(clk, reset, hsync, vsync, hpaddle, vpaddle, rgb);
|
||||
|
||||
input clk, reset;
|
||||
input hpaddle, vpaddle;
|
||||
output hsync, vsync;
|
||||
wire display_on;
|
||||
wire [8:0] hpos;
|
||||
wire [8:0] vpos;
|
||||
output reg [3:0] rgb;
|
||||
|
||||
// Memory: 16KB RAM + 4KB ROM
|
||||
reg [31:0] ram[0:16383]; // RAM (16384 x 32 bits = 64KB)
|
||||
reg [31:0] rom[0:1023]; // ROM (1024 x 32 bits = 4KB)
|
||||
|
||||
// FemtoRV32 CPU interface signals
|
||||
wire [31:0] mem_addr;
|
||||
wire [31:0] mem_wdata;
|
||||
wire [3:0] mem_wmask;
|
||||
reg [31:0] mem_rdata;
|
||||
wire mem_rstrb;
|
||||
reg mem_rbusy;
|
||||
reg mem_wbusy;
|
||||
|
||||
// Instantiate FemtoRV32 CPU
|
||||
FemtoRV32 #(
|
||||
.RESET_ADDR(32'h00010000), // Start execution from ROM area
|
||||
.ADDR_WIDTH(24) // 64KB address space
|
||||
) cpu (
|
||||
.clk(clk),
|
||||
.reset(reset), // FemtoRV32 reset (active high based on code)
|
||||
.mem_addr(mem_addr),
|
||||
.mem_wdata(mem_wdata),
|
||||
.mem_wmask(mem_wmask),
|
||||
.mem_rdata(mem_rdata),
|
||||
.mem_rstrb(mem_rstrb),
|
||||
.mem_rbusy(mem_rbusy),
|
||||
.mem_wbusy(mem_wbusy)
|
||||
);
|
||||
|
||||
// Memory address decoding
|
||||
wire ram_sel = (mem_addr[15] == 1'b0); // 0x0000-0xFFFF: RAM (64KB)
|
||||
wire rom_sel = (mem_addr[16:13] == 4'b1000); // 0x10000-0x10FFF: ROM (4KB)
|
||||
|
||||
// Memory read logic
|
||||
always @(posedge clk) begin
|
||||
if (mem_rstrb) begin
|
||||
mem_rbusy <= 1;
|
||||
if (rom_sel)
|
||||
mem_rdata <= rom[mem_addr[11:2]]; // Word-aligned ROM access
|
||||
else if (ram_sel)
|
||||
mem_rdata <= ram[mem_addr[15:2]]; // Word-aligned RAM access
|
||||
else
|
||||
mem_rdata <= 32'h00000000;
|
||||
end else begin
|
||||
mem_rbusy <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
// Memory write logic (synchronous)
|
||||
always @(posedge clk) begin
|
||||
if (ram_sel && |mem_wmask) begin
|
||||
mem_wbusy <= 1;
|
||||
// Byte-wise write masking
|
||||
if (mem_wmask[0]) ram[mem_addr[15:2]][7:0] <= mem_wdata[7:0];
|
||||
if (mem_wmask[1]) ram[mem_addr[15:2]][15:8] <= mem_wdata[15:8];
|
||||
if (mem_wmask[2]) ram[mem_addr[15:2]][23:16] <= mem_wdata[23:16];
|
||||
if (mem_wmask[3]) ram[mem_addr[15:2]][31:24] <= mem_wdata[31:24];
|
||||
end else begin
|
||||
mem_wbusy <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
// Video sync generator
|
||||
hvsync_generator hvsync_gen(
|
||||
.clk(clk),
|
||||
.reset(0),
|
||||
.hsync(hsync),
|
||||
.vsync(vsync),
|
||||
.display_on(display_on),
|
||||
.hpos(hpos),
|
||||
.vpos(vpos)
|
||||
);
|
||||
|
||||
// Video framebuffer rendering
|
||||
reg [13:0] vindex; // Index into framebuffer
|
||||
reg [31:0] vshift; // Shift register with current word to output
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (display_on) begin
|
||||
// Load next word from RAM every 8 pixels (32 bits = 8 pixels at 4bpp)
|
||||
if (hpos[2:0] == 3'b000) begin
|
||||
vshift <= ram[vindex]; // Read from framebuffer area (0x2000+)
|
||||
vindex <= vindex + 1;
|
||||
end else begin
|
||||
vshift <= vshift >> 4; // Shift next 4-bit pixel
|
||||
end
|
||||
// Decode scanline RAM to RGB output
|
||||
rgb <= vshift[3:0];
|
||||
end else begin
|
||||
rgb <= 0; // Set color to black
|
||||
if (vsync) vindex <= 0; // Reset vindex every frame
|
||||
end
|
||||
end
|
||||
|
||||
// Test program - simple pattern generator
|
||||
`ifdef EXT_INLINE_ASM
|
||||
initial begin
|
||||
rom = '{
|
||||
__asm
|
||||
.arch riscv
|
||||
.org 0x8000
|
||||
.len 0x400
|
||||
|
||||
; RISC-V test program - fill framebuffer with pattern
|
||||
; x1 = loop counter
|
||||
; x2 = RAM address
|
||||
; x3 = pattern value
|
||||
|
||||
start:
|
||||
lui x2, 0x0 ; x2 = 0x0 (framebuffer start)
|
||||
addi x1, x0, 0 ; x1 = 0 (counter)
|
||||
lui x4, 0x20 ; x4 = 0x10000 (0x10 << 12)
|
||||
|
||||
loop:
|
||||
add x3, x1, x0 ; x3 = counter value as pattern
|
||||
sw x3, 0(x2) ; Store pattern to framebuffer
|
||||
addi x2, x2, 4 ; Increment address by 4 bytes
|
||||
addi x1, x1, 1 ; Increment counter
|
||||
blt x2, x4, loop ; Loop if address < end
|
||||
|
||||
; Infinite loop to restart
|
||||
lui x2, 0x2 ; Reset to start
|
||||
addi x1, x1, 1 ; Increment counter
|
||||
jal x0, loop ; Jump back to loop
|
||||
|
||||
__endasm
|
||||
};
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
{
|
||||
"name":"riscv",
|
||||
"width":32,
|
||||
"vars":{
|
||||
"reg":{"bits":5, "toks":["zero","x1","x2","x3","x4","x5","x6","x7","x8","x9","x10","x11","x12","x13","x14","x15"]},
|
||||
"reg":{"bits":5, "toks":["x0","x1","x2","x3","x4","x5","x6","x7","x8","x9","x10","x11","x12","x13","x14","x15"]},
|
||||
"brop":{"bits":3, "toks":["beq","bne","bx2","bx3","blt","bge","bltu","bgeu"]},
|
||||
"imm5":{"bits":5},
|
||||
"imm12":{"bits":12},
|
||||
|
||||
Reference in New Issue
Block a user