From fbcfe3152c1d75e3f721d904912630d0590fdc16 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 4 Jun 2022 09:53:09 +0200 Subject: [PATCH] DMA debug with XiBus NuBus & add alternate Migen implementation of NuBus --- nubus-to-ztex-gateware/blit.c | 28 +- nubus-to-ztex-gateware/do | 3 + nubus-to-ztex-gateware/goblin_accel.py | 16 + nubus-to-ztex-gateware/nubus.py | 1 + nubus-to-ztex-gateware/nubus.v | 4 +- nubus-to-ztex-gateware/nubus_cpld.v | 5 +- nubus-to-ztex-gateware/nubus_full.py | 372 ++++++++++++++++++++ nubus-to-ztex-gateware/nubus_master_tst.py | 83 +++-- nubus-to-ztex-gateware/nubus_sampling.v | 112 ++++++ nubus-to-ztex-gateware/nubus_to_fpga_soc.py | 113 +++--- nubus-to-ztex-gateware/slave_tb.sv | 5 +- 11 files changed, 640 insertions(+), 102 deletions(-) create mode 100644 nubus-to-ztex-gateware/nubus_full.py create mode 100644 nubus-to-ztex-gateware/nubus_sampling.v diff --git a/nubus-to-ztex-gateware/blit.c b/nubus-to-ztex-gateware/blit.c index 7c06b2f..db28a48 100644 --- a/nubus-to-ztex-gateware/blit.c +++ b/nubus-to-ztex-gateware/blit.c @@ -42,25 +42,28 @@ struct control_blitter { #define FUN_BLIT_BIT 0 // hardwired in goblin_accel.py #define FUN_FILL_BIT 1 // hardwired in goblin_accel.py +#define FUN_TEST_BIT 3 // hardwired in goblin_accel.py #define FUN_DONE_BIT 31 #define FUN_BLIT (1<reg_r5_cmd; + // fixme; switching to & 0xFFFF will use zext.h, which isn't included in our Vex ATM switch (cmd & 0xF) { case FUN_BLIT: { bitblit(fbc->reg_bitblt_src_x, fbc->reg_bitblt_src_y, @@ -171,6 +175,20 @@ void from_reset(void) { fbc->reg_width, fbc->reg_height, fbc->reg_fgcolor); } break; +#if 1 + case FUN_TEST: { + u_int32_t val = fbc->reg_chk_val; + u_int32_t* ptr = (u_int32_t*)fbc->reg_chk_adr; + u_int32_t pval = (*ptr); + fbc->reg_chk_val = (val ^ pval); + if (pval == 0x01234567) + fbc->reg_chk_adr = 1; + else if (pval == 0x67452301) + fbc->reg_chk_adr = 0; + else + fbc->reg_chk_adr = -1; + } break; +#endif default: break; } @@ -180,7 +198,7 @@ void from_reset(void) { // make sure we have nothing left in the cache flush_cache(); - fbc->reg_r5_cmd = 0xFFFFFFFF; //FUN_DONE; + fbc->reg_r5_cmd = FUN_DONE; done: /* wait for reset */ diff --git a/nubus-to-ztex-gateware/do b/nubus-to-ztex-gateware/do index a20eb15..dcb2cd0 100644 --- a/nubus-to-ztex-gateware/do +++ b/nubus-to-ztex-gateware/do @@ -3,6 +3,9 @@ source /opt/Xilinx/Vivado/2020.1/settings64.sh export LD_LIBRARY_PATH=/opt/Xilinx/Vivado/2020.1/lib/lnx64.o/SuSE python3 nubus_to_fpga_soc.py --build --csr-csv csr.csv --csr-json csr.json --variant=ztex2.13a --version=V1.0 --sys-clk-freq 100e6 --goblin --goblin-res 1920x1080@60Hz --hdmi + +#python3 nubus_to_fpga_soc.py --csr-csv csr.csv --csr-json csr.json --variant=ztex2.13a --version=V1.0 --sys-clk-freq 100e6 + ) 2>&1 | tee build_V1_0.log # --goblin --goblin-res 1280x1024@60Hz # --hdmi diff --git a/nubus-to-ztex-gateware/goblin_accel.py b/nubus-to-ztex-gateware/goblin_accel.py index 5d7fbe6..1106a93 100644 --- a/nubus-to-ztex-gateware/goblin_accel.py +++ b/nubus-to-ztex-gateware/goblin_accel.py @@ -26,14 +26,18 @@ class GoblinAccel(Module): # AutoCSR ? reg_bitblt_src_y = Signal(COORD_BITS) # 9 reg_bitblt_dst_x = Signal(COORD_BITS) # 10 reg_bitblt_dst_y = Signal(COORD_BITS) # 11 + reg_chk_adr = Signal(32) # 12 + reg_chk_val = Signal(32) # 13 # do-some-work flags do_blit = Signal() do_fill = Signal() + do_test = Signal() # cmd register reg_cmd DO_BLIT_BIT = 0 DO_FILL_BIT = 1 + DO_TEST_BIT = 3 # global status register reg_status WORK_IN_PROGRESS_BIT = 0 @@ -51,6 +55,7 @@ class GoblinAccel(Module): # AutoCSR ? 1: [ NextValue(reg_cmd, bus.dat_w), NextValue(do_blit, bus.dat_w[DO_BLIT_BIT] & ~reg_status[WORK_IN_PROGRESS_BIT]), NextValue(do_fill, bus.dat_w[DO_FILL_BIT] & ~reg_status[WORK_IN_PROGRESS_BIT]), + NextValue(do_test, bus.dat_w[DO_TEST_BIT] & ~reg_status[WORK_IN_PROGRESS_BIT]), ], 2: [ NextValue(reg_r5_cmd, bus.dat_w) ], # 3 @@ -62,6 +67,8 @@ class GoblinAccel(Module): # AutoCSR ? 9: [ NextValue(reg_bitblt_src_y, bus.dat_w) ], 10: [ NextValue(reg_bitblt_dst_x, bus.dat_w) ], 11: [ NextValue(reg_bitblt_dst_y, bus.dat_w) ], + 12: [ NextValue(reg_chk_adr, bus.dat_w) ], + 13: [ NextValue(reg_chk_val, bus.dat_w) ], }), NextValue(bus.ack, 1), ).Elif(bus.cyc & bus.stb & ~bus.we & ~bus.ack, #read @@ -79,6 +86,8 @@ class GoblinAccel(Module): # AutoCSR ? 9: [ NextValue(bus.dat_r, reg_bitblt_src_y) ], 10: [ NextValue(bus.dat_r, reg_bitblt_dst_x) ], 11: [ NextValue(bus.dat_r, reg_bitblt_dst_y) ], + 12: [ NextValue(bus.dat_r, reg_chk_adr) ], + 13: [ NextValue(bus.dat_r, reg_chk_val) ], }), NextValue(bus.ack, 1), ).Else( @@ -90,6 +99,7 @@ class GoblinAccel(Module): # AutoCSR ? FUN_DONE_BIT = 31 FUN_BLIT_BIT = 0 FUN_FILL_BIT = 1 + FUN_TEST_BIT = 3 # to hold the Vex in reset local_reset = Signal(reset = 1) @@ -111,6 +121,12 @@ class GoblinAccel(Module): # AutoCSR ? reg_status[WORK_IN_PROGRESS_BIT].eq(1), local_reset.eq(0), #timeout.eq(timeout_rst), + ).Elif(do_test & ~reg_status[WORK_IN_PROGRESS_BIT], + do_test.eq(0), + reg_r5_cmd[FUN_TEST_BIT].eq(1), + reg_status[WORK_IN_PROGRESS_BIT].eq(1), + local_reset.eq(0), + #timeout.eq(timeout_rst), ) ] diff --git a/nubus-to-ztex-gateware/nubus.py b/nubus-to-ztex-gateware/nubus.py index a1d8aa2..3ad6ffe 100644 --- a/nubus-to-ztex-gateware/nubus.py +++ b/nubus-to-ztex-gateware/nubus.py @@ -109,6 +109,7 @@ class NuBus(Module): def add_sources(self, platform): platform.add_source("nubus.v", "verilog") + # XiBus is from my github, branch 'more_fixes' platform.add_source("/home/dolbeau/XiBus/nubus.svh", "verilog") #platform.add_source("/home/dolbeau/XiBus/nubus_arbiter.v", "verilog") # in the CPLD platform.add_source("/home/dolbeau/XiBus/nubus_cpubus.v", "verilog") diff --git a/nubus-to-ztex-gateware/nubus.v b/nubus-to-ztex-gateware/nubus.v index 85def86..4c908cd 100644 --- a/nubus-to-ztex-gateware/nubus.v +++ b/nubus-to-ztex-gateware/nubus.v @@ -54,7 +54,7 @@ module nubus // output nub_nmrqn, // Non-Master Request, handled in the Litex code /* those are used but connected only to the CPLD */ - /* we deal with the CPLD via 'arbcy_n' and 'grant' + /* we deal with the CPLD via 'arbcy_n' and 'grant' */ // inout [ 3:0] nub_arbn, // Arbitration /* *** CPLD <-> FPGA signals, not in NuBus */ @@ -275,7 +275,7 @@ module nubus // ========================================================================== assign cpu_rdata = ~nub_adn; - assign cpu_ready = ~nub_ackn & nub_startn; + assign cpu_ready = ~nub_ackn & nub_startn & ~mst_ownern; // if mst_ownern is inactive (high), then we're seeing the ACK from the previous slave transaction that we were waiting on nubus_cpubus UCPUBus ( diff --git a/nubus-to-ztex-gateware/nubus_cpld.v b/nubus-to-ztex-gateware/nubus_cpld.v index 0c554a9..83b3ad3 100644 --- a/nubus-to-ztex-gateware/nubus_cpld.v +++ b/nubus-to-ztex-gateware/nubus_cpld.v @@ -12,7 +12,7 @@ module nubus_cpld input clk2x_n_5v, // clk from NuBus90 // Spares - input fpga_to_cpld_clk, // unused (extra line from FPGA to CPLD, pin is a clk input) + input fpga_to_cpld_clk, // rqstoen (extra line from FPGA to CPLD, pin is a clk input) input fpga_to_cpld_signal, // unused (extra line from FPGA to CPLD) inout fpga_to_cpld_signal_2, // unused (extra line from FPGA to CPLD) @@ -82,7 +82,8 @@ module nubus_cpld // rqst_o_n is always driven (the 74lvt125 wired as open collector will convert 1 to Z) and is active low assign rqst_o_n = nubus_oe ? 1 : (~fpga_to_cpld_signal ? rqst_n_3v3 : 1); // master out assign rqst_n_3v3 = nubus_oe ? 'bZ : ( fpga_to_cpld_signal ? rqst_n_5v : 'bZ); // master in - + //assign rqst_n_3v3 = rqst_n_5v; // master in, always on + //assign ack_o_5v = nubus_oe ? 'bZ : ((nubus_master_dir ^ ~tmoen) ? ack_n_3v3 : 'bZ); // slave out/in assign ack_o_n = nubus_oe ? 1 : (( ~tmoen) ? ack_n_3v3 : 1); // slave out/in assign ack_oe_n = nubus_oe ? 1 : (( ~tmoen) ? 0 : 1); // slave out/in diff --git a/nubus-to-ztex-gateware/nubus_full.py b/nubus-to-ztex-gateware/nubus_full.py new file mode 100644 index 0000000..82566c9 --- /dev/null +++ b/nubus-to-ztex-gateware/nubus_full.py @@ -0,0 +1,372 @@ +from migen import * +from migen.genlib.fifo import * +from migen.genlib.cdc import * +from migen.fhdl.specials import Tristate + +import litex +from litex.soc.interconnect import wishbone + +class NuBus(Module): + def __init__(self, platform, wb_read, wb_write, wb_dma, cd_nubus="nubus", cd_nubus90="nubus90"): + + self.add_sources(platform) + + #led0 = platform.request("user_led", 0) + #led1 = platform.request("user_led", 1) + + # Signals for tri-stated nubus access + # slave + tmo_oe = Signal() # output enable + tm0_i_n = Signal() + tm0_o_n = Signal() + tm1_i_n = Signal() + tm1_o_n = Signal() + ack_i_n = Signal() + ack_o_n = Signal() + + ad_oe = Signal() + ad_i_n = Signal(32) + ad_o_n = Signal(32) + + id_i_n = Signal(4) + + start_i_n = Signal() + start_o_n = Signal() # master via master_oe + + # master + rqst_oe = Signal() + rqst_i_n = Signal() + rqst_o_n = Signal() + + # sampled signals, exposing the value of the register acquired on the falling edge + # they can change every cycle *on falling edge* + # slave + sampled_tm0 = Signal() # high is byte (which byte is in ad0/ad1); low is halfword/word/block depending on ad0/ad1 + sampled_tm1 = Signal() # high is write + sampled_start = Signal() + sampled_ack = Signal() + sampled_ad = Signal(32) + + # master + sampled_rqst = Signal() + + # address rewriting + # can change every cycle *on falling edge* + processed_ad = Signal(32) + self.comb += [ + processed_ad[0:23].eq(sampled_ad[0:23]), + If(~sampled_ad[23], # first 8 MiB of slot space: remap to last 8 Mib of SDRAM + processed_ad[23:32].eq(Cat(Signal(1, reset=1), Signal(8, reset = 0x8f))), # 0x8f8... + ).Else( # second 8 MiB: direct access + processed_ad[23:32].eq(Cat(sampled_ad[23], Signal(8, reset = 0xf0)))), # 24 bits, a.k.a 22 bits of words + ] + + # decoded signals, exposing decoded results from the sampled signals + # they can change every cycle *on falling edge* + # from sampling (fixme?) + decoded_sel = Signal(4) + decoded_block = Signal() + decoded_busy = Signal() + # locally evaluated + decoded_myslot = Signal() + self.comb += [ + decoded_myslot.eq( + (sampled_ad[28:32] == 0xF) & + (sampled_ad[27] == ~id_i_n[3]) & + (sampled_ad[26] == ~id_i_n[2]) & + (sampled_ad[25] == ~id_i_n[1]) & + (sampled_ad[24] == ~id_i_n[0])), + #led0.eq(decoded_block), + ] + + # current value, registered from the sampled/processed/decoded signals + # change is controlled by the FSM + current_adr = Signal(32) + current_tm0 = Signal() + current_tm1 = Signal() + current_sel = Signal(4) + current_block = Signal() + current_data = Signal(32) + + # write FIFO to speed up bus turnaround on NuBus side + write_fifo_layout = [ + ("adr", 32), + ("data", 32), + ("sel", 4), + ] + self.submodules.write_fifo = write_fifo = ClockDomainsRenamer({"read": "sys", "write": "nubus"})(AsyncFIFOBuffered(width=layout_len(write_fifo_layout), depth=8)) + write_fifo_dout = Record(write_fifo_layout) + self.comb += write_fifo_dout.raw_bits().eq(write_fifo.dout) + write_fifo_din = Record(write_fifo_layout) + self.comb += write_fifo.din.eq(write_fifo_din.raw_bits()) + + self.specials += Instance("nubus_sampling", + i_nub_clkn = ClockSignal(cd_nubus), + i_nub_resetn = ~ResetSignal(cd_nubus), + i_nub_tm0n = tm0_i_n, + i_nub_tm1n = tm1_i_n, + i_nub_startn = start_i_n, + i_nub_rqstn = rqst_i_n, + i_nub_ackn = ack_i_n, + i_nub_adn = ad_i_n, + + o_tm0 = sampled_tm0, + o_tm1 = sampled_tm1, + o_start = sampled_start, + o_rqst = sampled_rqst, + o_ack = sampled_ack, + o_ad = sampled_ad, + + o_sel = decoded_sel, + o_block = decoded_block, + o_busy = decoded_busy, + ) + + self.submodules.slave_fsm = slave_fsm = ClockDomainsRenamer(cd_nubus)(FSM(reset_state="Reset")) + slave_fsm.act("Reset", + NextState("Idle") + ) + slave_fsm.act("Idle", + If(decoded_myslot & sampled_start & ~sampled_ack & ~sampled_tm1,# & ~decoded_block, # regular read (we always send back 32 bits, so don't worry about byte/word) + NextValue(current_adr, processed_ad), + #NextValue(current_tm0, sampled_tm0), + #NextValue(current_tm1, sampled_tm1), + #NextValue(current_sel, decoded_sel), + #NextValue(current_block, decoded_block), + #If(decoded_block, + # NextValue(decoded_block_memory, 1),), + NextState("WaitWBRead"), + ).Elif(decoded_myslot & sampled_start & ~sampled_ack & sampled_tm1,# & ~decoded_block, # regular write + NextValue(current_adr, processed_ad), + #NextValue(current_tm0, sampled_tm0), + #NextValue(current_tm1, sampled_tm1), + NextValue(current_sel, decoded_sel), + #NextValue(current_block, decoded_block), + #If(decoded_block, + # NextValue(decoded_block_memory, 1),), + #NextState("GetNubusWriteData"), + NextState("NubusWriteDataToFIFO"), + ) + ) + slave_fsm.act("WaitWBRead", + wb_read.cyc.eq(1), + wb_read.stb.eq(1), + wb_read.we.eq(0), + wb_read.sel.eq(0xf), + wb_read.adr.eq(current_adr[2:32]), + tmo_oe.eq(1), + tm0_o_n.eq(1), + tm1_o_n.eq(1), + ack_o_n.eq(1), + If(wb_read.ack, + ad_oe.eq(1), + ad_o_n.eq(~wb_read.dat_r), + tm0_o_n.eq(0), + tm1_o_n.eq(0), + ack_o_n.eq(0), + NextState("Idle"), + ) + ) + #slave_fsm.act("GetNubusWriteData", + # NextValue(current_data, sampled_ad), + # wb_read.cyc.eq(1), + # wb_read.stb.eq(1), + # wb_read.we.eq(1), + # wb_read.sel.eq(current_sel), + # wb_read.adr.eq(current_adr[2:32]), + # wb_read.dat_w.eq(sampled_ad), + # If(wb_read.ack, + # tmo_oe.eq(1), + # tm0_o_n.eq(0), + # tm1_o_n.eq(0), + # ack_o_n.eq(0), + # NextState("Idle"), + # ).Else( + # NextState("WaitWBWrite"), + # ) + #) + #slave_fsm.act("WaitWBWrite", + # wb_read.cyc.eq(1), + # wb_read.stb.eq(1), + # wb_read.we.eq(1), + # wb_read.sel.eq(current_sel), + # wb_read.adr.eq(current_adr[2:32]), + # wb_read.dat_w.eq(current_data), + # If(wb_read.ack, + # tmo_oe.eq(1), + # tm0_o_n.eq(0), + # tm1_o_n.eq(0), + # ack_o_n.eq(0), + # NextState("Idle"), + # ) + #) + slave_fsm.act("NubusWriteDataToFIFO", + write_fifo.we.eq(1), + tmo_oe.eq(1), + tm0_o_n.eq(0), + tm1_o_n.eq(0), + ack_o_n.eq(0), + NextState("Idle"), + ) + + # connect the write FIFO inputs + self.comb += [ write_fifo_din.adr.eq(current_adr), # recorded + write_fifo_din.data.eq(sampled_ad), # we do it live + write_fifo_din.sel.eq(current_sel), # recorded + ] + # deal with emptying the Write FIFO to the write WB + self.comb += [ wb_write.cyc.eq(write_fifo.readable), + wb_write.stb.eq(write_fifo.readable), + wb_write.we.eq(1), + wb_write.adr.eq(write_fifo_dout.adr[2:32]), + wb_write.dat_w.eq(write_fifo_dout.data), + wb_write.sel.eq(write_fifo_dout.sel), + write_fifo.re.eq(wb_write.ack), + ] + + owning_bus = Signal(reset = 0) # fixme ; theoretically one can bypass arbitration when owning the bus + + start_arbitration = Signal() + grant = Signal() + master_oe = Signal() + + nubus_sync = getattr(self.sync, cd_nubus) + nubus_sync += [ + If(sampled_rqst & ~start_arbitration, + owning_bus.eq(0), + ) + ] + + self.submodules.dma_fsm = dma_fsm = ClockDomainsRenamer(cd_nubus)(FSM(reset_state="Reset")) + dma_fsm.act("Reset", + NextState("Idle") + ) + dma_fsm.act("Idle", + If(wb_dma.cyc & wb_dma.stb & ~sampled_rqst, # we need the bus and it's not being requested + If(owning_bus, # we own the bus, skip arbitration + NextState("AdrCycle"), + ).Else( # go for arbitration + NextState("Arbitration"), + ), + ) + ) + dma_fsm.act("Arbitration", + start_arbitration.eq(1), + rqst_oe.eq(1), + rqst_o_n.eq(0), + NextState("WaitForGrant"), + ) + dma_fsm.act("WaitForGrant", + start_arbitration.eq(1), + rqst_oe.eq(1), + rqst_o_n.eq(0), + If(grant & ~decoded_busy, # I'm now 'owner' + NextValue(owning_bus, 1), + NextState("AdrCycle"), + ) + ) + dma_fsm.act("AdrCycle", + start_arbitration.eq(0), + master_oe.eq(1), # for start + tmo_oe.eq(1), # for tm0, tm1, ack + ad_oe.eq(1), # for write address + start_o_n.eq(0), + tm0_o_n.eq(~((wb_dma.sel == 0x1) | (wb_dma.sel == 0x2) | (wb_dma.sel == 0x4) | (wb_dma.sel == 0x8))), # byte only + tm1_o_n.eq(~wb_dma.we), + ad_o_n[0].eq(~((wb_dma.sel == 0x2) | (wb_dma.sel == 0x3) | (wb_dma.sel == 0x8) | (wb_dma.sel == 0xc))), # odd bytes, both half-words + ad_o_n[1].eq(~((wb_dma.sel == 0x4) | (wb_dma.sel == 0x8) | (wb_dma.sel == 0xc))), # upper bytes and half-word + ad_o_n[2:32].eq(~wb_dma.adr), + ack_o_n.eq(1), + If(wb_dma.we, + NextState("DatCycle"), + ).Else( + NextState("ReadWaitForAck"), + ) + ) + dma_fsm.act("DatCycle", + master_oe.eq(1), # for start + ad_oe.eq(1), # for write data + start_o_n.eq(1), # start finished, but still need to be driven + ad_o_n.eq(~wb_dma.dat_w), + If(sampled_ack, + wb_dma.ack.eq(1), + # fixme: check status ??? (tm0 and tm1 should be active for no-error) + NextState("FinishCycle"), + ) + ) + dma_fsm.act("FinishCycle", + master_oe.eq(1), # for start + start_o_n.eq(1), # start finished, but still need to be driven + tmo_oe.eq(1), # for tm0, tm1, ack, need to be driven to inactive + tm0_o_n.eq(1), + tm1_o_n.eq(1), + ack_o_n.eq(1), + NextState("Idle"), + ) + dma_fsm.act("ReadWaitForAck", + master_oe.eq(1), # for start + start_o_n.eq(1), # start finished, but still need to be driven + wb_dma.dat_r.eq(sampled_ad), + If(sampled_ack, + wb_dma.ack.eq(1), + # fixme: check status ??? (tm0 and tm1 should be active for no-error) + NextState("FinishCycle"), + ) + ) + + # stuff at this end so we don't use the signals inadvertantly + + # real NuBus signals + nub_tm0n = platform.request("tm0_3v3_n") + nub_tm1n = platform.request("tm1_3v3_n") + nub_startn = platform.request("start_3v3_n") + nub_ackn = platform.request("ack_3v3_n") + nub_adn = platform.request("ad_3v3_n") + nub_idn = platform.request("id_3v3_n") + + # Tri-state + self.specials += Tristate(nub_tm0n, tm0_o_n, tmo_oe, tm0_i_n) + self.specials += Tristate(nub_tm1n, tm1_o_n, tmo_oe, tm1_i_n) + self.specials += Tristate(nub_ackn, ack_o_n, tmo_oe, ack_i_n) + self.specials += Tristate(nub_adn, ad_o_n, ad_oe, ad_i_n) + self.specials += Tristate(nub_startn, start_o_n, master_oe, start_i_n) + self.comb += [ + id_i_n.eq(nub_idn), + ] + + # NubusFPGA-only signals + nf_tmoen = platform.request("tmoen") + nf_nubus_ad_dir = platform.request("nubus_ad_dir") + + self.comb += [ + nf_tmoen.eq(~tmo_oe), + nf_nubus_ad_dir.eq(~ad_oe), + ] + + # real Nubus signal, for master + nub_rqstn = platform.request("rqst_3v3_n") + + # Tri-state + self.specials += Tristate(nub_rqstn, rqst_o_n, rqst_oe, rqst_i_n) + + # NubusFPGA-only signals, for master + nub_arbcy_n = platform.request("arbcy_n") + nf_grant = platform.request("grant") + nf_nubus_master_dir = platform.request("nubus_master_dir") + nf_fpga_to_cpld_signal = platform.request("fpga_to_cpld_signal") + + # NuBus90 signals, , for completeness + nub_clk2xn = ClockSignal(cd_nubus90) + nub_tm2n = platform.request("tm2_3v3_n") + + self.comb += [ + nf_nubus_master_dir.eq(master_oe), + nub_arbcy_n.eq(~start_arbitration), + grant.eq(nf_grant), + nf_fpga_to_cpld_signal.eq(~rqst_oe), + ] + + + def add_sources(self, platform): + # sampling of data on falling edge of clock, done in verilog + platform.add_source("nubus_sampling.v", "verilog") diff --git a/nubus-to-ztex-gateware/nubus_master_tst.py b/nubus-to-ztex-gateware/nubus_master_tst.py index 0a4f3ce..998f347 100644 --- a/nubus-to-ztex-gateware/nubus_master_tst.py +++ b/nubus-to-ztex-gateware/nubus_master_tst.py @@ -5,27 +5,35 @@ import litex from litex.soc.interconnect import wishbone class PingMaster(Module): - def __init__(self, platform): + def __init__(self, nubus, platform): self.bus_slv = bus_slv = wishbone.Interface() self.bus_mst = bus_mst = wishbone.Interface() - led0 = platform.request("user_led", 0) - led1 = platform.request("user_led", 1) + #led0 = platform.request("user_led", 0) + #led1 = platform.request("user_led", 1) valu_reg = Signal(32) - addr_reg = Signal(32) + waddr_reg = Signal(32) + raddr_reg = Signal(32) writ_del = Signal(6) + read_del = Signal(6) do_write = Signal() - #addr_reg_rev = Signal(32) - #self.comb += [ addr_reg_rev[ 0: 8].eq(addr_reg[24:32]), - # addr_reg_rev[ 8:16].eq(addr_reg[16:24]), - # addr_reg_rev[16:24].eq(addr_reg[ 8:16]), - # addr_reg_rev[24:32].eq(addr_reg[ 0: 8]), ] + do_read = Signal() + #waddr_reg_rev = Signal(32) + #self.comb += [ waddr_reg_rev[ 0: 8].eq(waddr_reg[24:32]), + # waddr_reg_rev[ 8:16].eq(waddr_reg[16:24]), + # waddr_reg_rev[16:24].eq(waddr_reg[ 8:16]), + # waddr_reg_rev[24:32].eq(waddr_reg[ 0: 8]), ] self.sync += [ If(writ_del != 0, writ_del.eq(writ_del - 1),), If(writ_del == 1, do_write.eq(1), + ), + If(read_del != 0, + read_del.eq(read_del - 1),), + If(read_del == 1, + do_read.eq(1), ) ] @@ -36,16 +44,19 @@ class PingMaster(Module): wishbone_fsm.act("Idle", If(bus_slv.cyc & bus_slv.stb & bus_slv.we & ~bus_slv.ack, #write # FIXME: should check for prefix? - Case(bus_slv.adr[0:1], { + Case(bus_slv.adr[0:2], { 0x0: [ NextValue(valu_reg, bus_slv.dat_w[0:32]), ], - 0x1: [ NextValue(addr_reg, bus_slv.dat_w[0:32]), - NextValue(writ_del, 63), ], + 0x1: [ NextValue(waddr_reg, bus_slv.dat_w[0:32]), + NextValue(writ_del, 3), ], + 0x2: [ NextValue(raddr_reg, bus_slv.dat_w[0:32]), + NextValue(read_del, 3), ], }), NextValue(bus_slv.ack, 1), ).Elif(bus_slv.cyc & bus_slv.stb & ~bus_slv.we & ~bus_slv.ack, #read - Case(bus_slv.adr[0:1], { + Case(bus_slv.adr[0:2], { 0x0: [ NextValue(bus_slv.dat_r, valu_reg), ], - 0x1: [ NextValue(bus_slv.dat_r, addr_reg), ], + 0x1: [ NextValue(bus_slv.dat_r, waddr_reg), ], + 0x2: [ NextValue(bus_slv.dat_r, raddr_reg), ], }), NextValue(bus_slv.ack, 1), ).Else( @@ -57,19 +68,51 @@ class PingMaster(Module): writer_fsm.act("Reset", NextState("Idle"),) writer_fsm.act("Idle", - If(do_write, + If(do_write, # & ~nubus.slave_in_use, NextValue(do_write, 0), - NextState("Write"),),) + bus_mst.cyc.eq(1), + bus_mst.stb.eq(1), + bus_mst.we.eq(1), + bus_mst.dat_w.eq(valu_reg), + bus_mst.adr.eq(waddr_reg[2:32]), + bus_mst.sel.eq(0xf), + If(bus_mst.ack, + NextState("Idle") + ).Else( + NextState("Write") + ) + ).Elif(do_read, + NextValue(do_read, 0), + bus_mst.cyc.eq(1), + bus_mst.stb.eq(1), + bus_mst.we.eq(0), + bus_mst.adr.eq(raddr_reg[2:32]), + bus_mst.sel.eq(0xf), + NextState("Read"), + ) + ) writer_fsm.act("Write", bus_mst.cyc.eq(1), bus_mst.stb.eq(1), bus_mst.we.eq(1), bus_mst.dat_w.eq(valu_reg), - bus_mst.adr.eq(addr_reg[2:32]), + bus_mst.adr.eq(waddr_reg[2:32]), bus_mst.sel.eq(0xf), If(bus_mst.ack, - NextState("Idle")), + NextState("Idle") + ), + ) + writer_fsm.act("Read", + bus_mst.cyc.eq(1), + bus_mst.stb.eq(1), + bus_mst.we.eq(0), + bus_mst.adr.eq(raddr_reg[2:32]), + bus_mst.sel.eq(0xf), + If(bus_mst.ack, + NextValue(valu_reg, bus_mst.dat_r), + NextState("Idle") + ), ) - self.comb += [ led0.eq(bus_mst.cyc), - led1.eq(writ_del != 0), ] + #self.comb += [ led0.eq(bus_mst.cyc), + # led1.eq(writ_del != 0), ] diff --git a/nubus-to-ztex-gateware/nubus_sampling.v b/nubus-to-ztex-gateware/nubus_sampling.v new file mode 100644 index 0000000..6928b76 --- /dev/null +++ b/nubus-to-ztex-gateware/nubus_sampling.v @@ -0,0 +1,112 @@ +/* + * NuBus sampling + * + * Romain Dolbeau for the NuBusFPGA + * Copyright (c) 2021022 + */ + +/* This module is running on the FPGA */ + +module nubus_sampling + ( + /* *** NuBus signals *** */ + /* those are connected to the FPGA */ + /* connected via the CPLD */ + input nub_clkn, // Clock (rising is driving edge, faling is sampling) + input nub_resetn, // Reset + //input [ 3:0] nub_idn, // Slot Identification + input nub_tm0n, // Transfer Mode + input nub_tm1n, // Transfer Mode + input nub_startn, // Start + input nub_rqstn, // Request + input nub_ackn, // Acknowledge + + // connected via the CPLD but NuBus90 (unimplemented) + //input nub_clk2xn, + //inout nub_tm2n, + + /* connected via the 74LVT245 */ + input [31:0] nub_adn, // Address/Data + + /* those are not used, and not even connected in the board */ + // inout nub_pfwn, // Power Fail Warning + // inout nub_spn, // System Parity + // inout nub_spvn, // System Parity Valid + + /* those ared used but handled in directly in the Litex code */ + // output nub_nmrqn, // Non-Master Request, handled in the Litex code + + /* those are used but connected only to the CPLD */ + /* we deal with the CPLD via 'arbcy_n' and 'grant' */ + // inout [ 3:0] nub_arbn, // Arbitration + + output tm0, + output tm1, + output start, + output rqst, + output ack, + output [31:0] ad, + + output [3:0] sel, + output block, + output busy + ); + + reg reg_tm0n, reg_tm1n; + reg reg_startn; + reg reg_rqstn; + reg reg_ackn; + reg [31:0] reg_adn; + reg reg_busy; + + + always @(negedge nub_clkn) begin: proc_sampling + if (~nub_resetn) begin + reg_tm0n <= 1; + reg_tm1n <= 1; + reg_startn <= 1; + reg_rqstn <= 1; + reg_ackn <= 1; + reg_adn <= 0; + reg_busy <= 0; + end else begin + reg_tm0n <= nub_tm0n; + reg_tm1n <= nub_tm1n; + reg_startn <= nub_startn; + reg_rqstn <= nub_rqstn; + reg_ackn <= nub_ackn; + reg_adn <= nub_adn; + reg_busy <= ~reg_busy & nub_ackn & ~nub_startn /* beginning of transaction */ + | reg_busy & nub_ackn & nub_resetn; /* hold during cycle */ + end + end + + assign tm0 = ~reg_tm0n; + assign tm1 = ~reg_tm1n; + assign start = ~reg_startn; + assign rqst = ~reg_rqstn; + assign ack = ~reg_ackn; + assign ad = ~reg_adn; + assign busy = reg_busy; + + // write selector for Wishbone + assign sel[3] = ~reg_tm1n & ~reg_adn[1] & ~reg_adn[0] & ~reg_tm0n /* Byte 3 */ + | ~reg_tm1n & ~reg_adn[1] & ~reg_adn[0] & reg_tm0n /* Half 1 */ + | ~reg_tm1n & reg_adn[1] & reg_adn[0] & reg_tm0n /* Word */ + ; + assign sel[2] = ~reg_tm1n & ~reg_adn[1] & reg_adn[0] & ~reg_tm0n /* Byte 2 */ + | ~reg_tm1n & ~reg_adn[1] & ~reg_adn[0] & reg_tm0n /* Half 1 */ + | ~reg_tm1n & reg_adn[1] & reg_adn[0] & reg_tm0n /* Word */ + ; + assign sel[1] = ~reg_tm1n & reg_adn[1] & ~reg_adn[0] & ~reg_tm0n /* Byte 1 */ + | ~reg_tm1n & reg_adn[1] & ~reg_adn[0] & reg_tm0n /* Half 0 */ + | ~reg_tm1n & reg_adn[1] & reg_adn[0] & reg_tm0n /* Word */ + ; + assign sel[0] = ~reg_tm1n & reg_adn[1] & reg_adn[0] & ~reg_tm0n /* Byte 0 */ + | ~reg_tm1n & reg_adn[1] & ~reg_adn[0] & reg_tm0n /* Half 0 */ + | ~reg_tm1n & reg_adn[1] & reg_adn[0] & reg_tm0n /* Word */ + ; + + assign block = ~reg_adn[1] & reg_adn[0] & reg_tm0n; // 1x block write or 1x block read + +endmodule diff --git a/nubus-to-ztex-gateware/nubus_to_fpga_soc.py b/nubus-to-ztex-gateware/nubus_to_fpga_soc.py index 2c3019a..79c1891 100644 --- a/nubus-to-ztex-gateware/nubus_to_fpga_soc.py +++ b/nubus-to-ztex-gateware/nubus_to_fpga_soc.py @@ -17,6 +17,7 @@ import ztex213_nubus import nubus_to_fpga_export import nubus +import nubus_full from litedram.modules import MT41J128M16 from litedram.phy import s7ddrphy @@ -39,56 +40,6 @@ from nubus_memfifo_wb import NuBus2WishboneFIFO from nubus_cpu_wb import Wishbone2NuBus # CRG ---------------------------------------------------------------------------------------------- -class _CRG_MINI_SIM(Module): - def __init__(self, platform, sys_clk_freq, - goblin=False, - hdmi=False, - pix_clk=0): - self.clock_domains.cd_sys = ClockDomain() - self.clock_domains.cd_native = ClockDomain(reset_less=True) # 48MHz native, non-reset'ed (for power-on long delay, never reset, we don't want the delay after a warm reset) - self.clock_domains.cd_nubus = ClockDomain() # 10 MHz NuBus, reset'ed by NuBus, native NuBus clock domain (25% duty cycle) - self.clock_domains.cd_nubus90 = ClockDomain() # 20 MHz NuBus90, reset'ed by NuBus, native NuBus90 clock domain (25% duty cycle) - - # # # - clk48 = platform.request("clk48") - ###### explanations from betrusted-io/betrusted-soc/betrusted_soc.py - # Note: below feature cannot be used because Litex appends this *after* platform commands! This causes the generated - # clock derived constraints immediately below to fail, because .xdc file is parsed in-order, and the main clock needs - # to be created before the derived clocks. Instead, we use the line afterwards. - platform.add_platform_command("create_clock -name clk48 -period 20.8333 [get_nets clk48]") - # The above constraint must strictly proceed the below create_generated_clock constraints in the .XDC file - # This allows PLLs/MMCMEs to be placed anywhere and reference the input clock - self.clk48_bufg = Signal() - self.specials += Instance("BUFG", i_I=clk48, o_O=self.clk48_bufg) - self.comb += self.cd_native.clk.eq(self.clk48_bufg) - #self.cd_native.clk = clk48 - - clk_nubus = platform.request("clk_3v3_n") - if (clk_nubus is None): - print(" ***** ERROR ***** Can't find the NuBus Clock !!!!\n"); - assert(false) - self.cd_nubus.clk = clk_nubus - rst_nubus_n = platform.request("reset_3v3_n") - self.comb += self.cd_nubus.rst.eq(~rst_nubus_n) - platform.add_platform_command("create_clock -name nubus_clk -period 100.0 -waveform {{0.0 75.0}} [get_ports clk_3v3_n]") - - clk2x_nubus = platform.request("clk2x_3v3_n") - if (clk2x_nubus is None): - print(" ***** ERROR ***** Can't find the NuBus90 Clock !!!!\n"); - assert(false) - self.cd_nubus90.clk = clk2x_nubus - self.comb += self.cd_nubus90.rst.eq(~rst_nubus_n) - platform.add_platform_command("create_clock -name nubus90_clk -period 50.0 -waveform {{0.0 37.5}} [get_ports clk2x_3v3_n]") - - num_adv = 0 - num_clk = 0 - - #platform.add_platform_command("create_clock -name sysclk -period 20.8333 [get_nets clk48]") - #self.sys_bufg = Signal() - #self.specials += Instance("BUFG", i_I=clk48, o_O=self.sys_bufg) - #self.comb += self.cd_native.clk.eq(self.sys_bufg) - - class _CRG(Module): def __init__(self, platform, sys_clk_freq, goblin=False, @@ -311,10 +262,11 @@ class NuBusFPGA(SoCCore): avail_sdram = 256 * 1024 * 1024 self.add_ram("ram", origin=0x8f800000, size=2**16, mode="rw") - #self.submodules.leds = ClockDomainsRenamer("nubus")(LedChaser( - # pads = platform.request_all("user_led"), - # sys_clk_freq = 10e6)) - #self.add_csr("leds") + if (not notsimul): # otherwise we have no CSRs and litex doesn't like that + self.submodules.leds = ClockDomainsRenamer("nubus")(LedChaser( + pads = platform.request_all("user_led"), + sys_clk_freq = 10e6)) + self.add_csr("leds") base_fb = self.wb_mem_map["main_ram"] + avail_sdram - 1048576 # placeholder if (goblin): @@ -346,20 +298,36 @@ class NuBusFPGA(SoCCore): # Interface NuBus to wishbone # we need to cross clock domains - wishbone_master_sys = wishbone.Interface(data_width=self.bus.data_width) - self.submodules.wishbone_master_nubus = WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_master_sys, cd_master="nubus", cd_slave="sys") - self.bus.add_master(name="NuBusBridgeToWishbone", master=wishbone_master_sys) - - self.submodules.nubus = nubus.NuBus(platform=platform, cd_nubus="nubus") - #self.submodules.nubus2wishbone = ClockDomainsRenamer("nubus")(NuBus2Wishbone(nubus=self.nubus,wb=self.wishbone_master_nubus)) - nubus_writemaster_sys = wishbone.Interface(data_width=self.bus.data_width) - self.submodules.nubus2wishbone = NuBus2WishboneFIFO(platform=self.platform,nubus=self.nubus,wb_read=self.wishbone_master_nubus,wb_write=nubus_writemaster_sys) - self.bus.add_master(name="NuBusBridgeToWishboneWrite", master=nubus_writemaster_sys) - wishbone_slave_nubus = wishbone.Interface(data_width=self.bus.data_width) - self.submodules.wishbone2nubus = ClockDomainsRenamer("nubus")(Wishbone2NuBus(nubus=self.nubus,wb=wishbone_slave_nubus)) - self.submodules.wishbone_slave_sys = WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_slave_nubus, cd_master="sys", cd_slave="nubus") - self.bus.add_slave("DMA", self.wishbone_slave_sys, SoCRegion(origin=self.mem_map.get("master", None), size=0x40000000, cached=False)) - + xibus=0 + if (xibus): + wishbone_master_sys = wishbone.Interface(data_width=self.bus.data_width) + self.submodules.wishbone_master_nubus = WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_master_sys, cd_master="nubus", cd_slave="sys") + self.bus.add_master(name="NuBusBridgeToWishbone", master=wishbone_master_sys) + self.submodules.nubus = nubus.NuBus(platform=platform, cd_nubus="nubus") + #self.submodules.nubus2wishbone = ClockDomainsRenamer("nubus")(NuBus2Wishbone(nubus=self.nubus,wb=self.wishbone_master_nubus)) + nubus_writemaster_sys = wishbone.Interface(data_width=self.bus.data_width) + self.submodules.nubus2wishbone = NuBus2WishboneFIFO(platform=self.platform,nubus=self.nubus,wb_read=self.wishbone_master_nubus,wb_write=nubus_writemaster_sys) + self.bus.add_master(name="NuBusBridgeToWishboneWrite", master=nubus_writemaster_sys) + + wishbone_slave_nubus = wishbone.Interface(data_width=self.bus.data_width) + self.submodules.wishbone2nubus = ClockDomainsRenamer("nubus")(Wishbone2NuBus(nubus=self.nubus,wb=wishbone_slave_nubus)) + self.submodules.wishbone_slave_sys = WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_slave_nubus, cd_master="sys", cd_slave="nubus") + self.bus.add_slave("DMA", self.wishbone_slave_sys, SoCRegion(origin=self.mem_map.get("master", None), size=0x40000000, cached=False)) + else: + wishbone_master_sys = wishbone.Interface(data_width=self.bus.data_width) + self.submodules.wishbone_master_nubus = WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_master_sys, cd_master="nubus", cd_slave="sys") + nubus_writemaster_sys = wishbone.Interface(data_width=self.bus.data_width) + wishbone_slave_nubus = wishbone.Interface(data_width=self.bus.data_width) + self.submodules.wishbone_slave_sys = WishboneDomainCrossingMaster(platform=self.platform, slave=wishbone_slave_nubus, cd_master="sys", cd_slave="nubus") + self.submodules.nubus = nubus_full.NuBus(platform=platform, + wb_read=self.wishbone_master_nubus, + wb_write=nubus_writemaster_sys, + wb_dma=wishbone_slave_nubus, + cd_nubus="nubus") + self.bus.add_master(name="NuBusBridgeToWishbone", master=wishbone_master_sys) + self.bus.add_slave("DMA", self.wishbone_slave_sys, SoCRegion(origin=self.mem_map.get("master", None), size=0x40000000, cached=False)) + self.bus.add_master(name="NuBusBridgeToWishboneWrite", master=nubus_writemaster_sys) + if (goblin): if (not hdmi): self.submodules.videophy = VideoVGAPHY(platform.request("vga"), clock_domain="vga") @@ -387,10 +355,11 @@ class NuBusFPGA(SoCCore): self.add_ram("goblin_accel_ram", origin=self.mem_map["goblin_accel_ram"], size=2**12, mode="rw") # for testing - from nubus_master_tst import PingMaster - self.submodules.pingmaster = PingMaster(platform=self.platform) - self.bus.add_slave("pingmaster_slv", self.pingmaster.bus_slv, SoCRegion(origin=self.mem_map.get("pingmaster", None), size=0x010, cached=False)) - self.bus.add_master(name="pingmaster_mst", master=self.pingmaster.bus_mst) + if (True): + from nubus_master_tst import PingMaster + self.submodules.pingmaster = PingMaster(nubus=self.nubus, platform=self.platform) + self.bus.add_slave("pingmaster_slv", self.pingmaster.bus_slv, SoCRegion(origin=self.mem_map.get("pingmaster", None), size=0x010, cached=False)) + self.bus.add_master(name="pingmaster_mst", master=self.pingmaster.bus_mst) def main(): parser = argparse.ArgumentParser(description="SbusFPGA") diff --git a/nubus-to-ztex-gateware/slave_tb.sv b/nubus-to-ztex-gateware/slave_tb.sv index 6b374ca..bb7b18b 100644 --- a/nubus-to-ztex-gateware/slave_tb.sv +++ b/nubus-to-ztex-gateware/slave_tb.sv @@ -223,6 +223,7 @@ module nubus_slave_tb (); reg [31:0] tst_addrn; reg [31:0] tst_wdatan; reg [31:0] tst_rdatan; + reg tst_rqstn; reg mastermode_start; reg mastermode_tmack; @@ -231,6 +232,7 @@ module nubus_slave_tb (); assign nub_clk2xn = tst_clk2xn; assign bd_clk48 = tst_clk48; assign nub_resetn = tst_resetn; + assign nub_rqstn = tst_rqstn; // Drive NuBus signals assign nub_startn = mastermode_start ? 'bZ: tst_startn; assign nub_tm0n = (tst_startn & ~mastermode_tmack) ? 'bZ : tst_tmn[0]; @@ -257,6 +259,7 @@ module nubus_slave_tb (); tst_clkn <= 1; tst_resetn <= 0; + tst_rqstn <= 'bz; tst_addrn <= 'hFFFFFFFF; tst_wdatan <= 'hFFFFFFFF; tst_rdatan <= 'hFFFFFFFF; @@ -327,6 +330,7 @@ module nubus_slave_tb (); write_word(TMADN_WR_WORD, PING_ADDR+0, 'h00C0FFEE); read_word (TMADN_RD_WORD, PING_ADDR+0); write_word(TMADN_WR_WORD, PING_ADDR+4, 'h00096240); + //read_word (TMADN_RD_WORD, ROM_ADDR+0); mastermode_start <= 1; mastermode_tmack <= 0; @@ -351,7 +355,6 @@ module nubus_slave_tb (); @ (posedge nub_clkn); mastermode_start <= 0; mastermode_tmack <= 0; - #2000;