"""BBATop — top-level elaboratable for the GC BBA FPGA replacement. Clock domains ------------- capture : 54 MHz, from 12 MHz crystal via SB_PLL40_PAD (DIVR=0 DIVF=71 DIVQ=4) exi/sync : 24 MHz, from the iCE40UP5K internal SB_HFOSC (÷2, CLKHF_DIV=0b01) Submodule instantiation and signal wiring ----------------------------------------- See CLAUDE.md "Module Breakdown" and "CDC Signal Inventory" for the full list. """ from amaranth import * from exi_bba.exi_capture import ExiCapture from exi_bba.bba_register_file import BBARegisterFile from exi_bba.spram_arbiter import SPRAMArbiter from exi_bba.rx_frame_assembler import RXFrameAssembler from exi_bba.tx_frame_drain import TXFrameDrain from exi_bba.w5500_spi_master import W5500SPIMaster from exi_bba.w5100_parallel_master import W5100ParallelMaster from exi_bba.status_panel import StatusPanel from exi_bba.uart_console import UARTConsole from amaranth.lib.cdc import FFSynchronizer __all__ = ["BBATop"] class BBATop(Elaboratable): """Top-level module. Wires all submodules and defines clock domains. External ports (exposed for platform or testbench connection) ------------------------------------------------------------- EXI / GC interface (SPI Mode 3) exi_clk / exi_mosi / exi_cs_n : inputs from GC exi_miso : output to GC int_n : interrupt output (active low) W5500 SPI interface (SPI Mode 0) w5500_clk / w5500_mosi / w5500_cs_n : outputs to W5500 w5500_miso : input from W5500 w5500_int_n : W5500 interrupt (input, active low) w5500_rst_n : W5500 hardware reset (output, active low) """ def __init__(self, eth="w5100", reset_cycles=24000, status_panel=False, uart_console=False): # Ethernet back-end: "w5100" (indirect parallel bus, reaches the EXI # ceiling) or "w5500" (SPI, ~12 Mbit/s). Both expose the identical # tx/rx/init/par interface, so only the physical pins differ. self._eth = eth # MR-reset settle wait passed to the ethernet master (~1 ms on hardware; # the testbench overrides with a small value for fast simulation). self._reset_cycles = reset_cycles # Optional bring-up status panel (drives onboard LEDs/button on the # iCEbreaker — see synth.py). panel_led bit order matches StatusPanel. self._status_panel = status_panel # Optional UART debug console (8N1 115200, sync domain). # uart_tx → FT2232H Channel B pin 9; uart_rx ← pin 6. self._uart_console = uart_console # EXI (GC side) self.exi_clk = Signal(init=1) self.exi_mosi = Signal() self.exi_cs_n = Signal(init=1) self.exi_miso = Signal() self.int_n = Signal(init=1) if eth == "w5500": # W5500 SPI self.w5500_clk = Signal() self.w5500_mosi = Signal() self.w5500_miso = Signal() self.w5500_cs_n = Signal(init=1) self.w5500_int_n = Signal(init=1) self.w5500_rst_n = Signal(init=1) else: # W5100 indirect parallel bus. data_o/data_oe/data_i are the FPGA # side of a bidirectional D[7:0] (wrapped in a tristate SB_IO at the # platform level); a board ties the upper address lines to 0 so only # A[1:0] are wired. self.w5100_addr = Signal(2) self.w5100_data_o = Signal(8) self.w5100_data_oe = Signal() self.w5100_data_i = Signal(8) self.w5100_cs_n = Signal(init=1) self.w5100_rd_n = Signal(init=1) self.w5100_wr_n = Signal(init=1) self.w5100_int_n = Signal(init=1) self.w5100_rst_n = Signal(init=1) if status_panel: self.panel_led = Signal(5) # to onboard LEDs (see StatusPanel) self.panel_btn = Signal(3) # from onboard button(s) if uart_console: self.uart_tx = Signal(init=1) # FPGA → PC (FT2232H Channel B) self.uart_rx = Signal(init=1) # PC → FPGA def elaborate(self, platform): m = Module() # ── Clock domain generation ─────────────────────────────────────── # Three domains, two physical sources (1 PLL + 1 internal HFOSC): # capture @ 54 MHz (PLL) — SPI bit engine only; oversamples the # 27 MHz EXI clock 2× (robust Mode-3). # exi @ 24 MHz (HFOSC) — register file / transaction FSM. # sync @ 24 MHz (HFOSC) — SPRAM, RX/TX engines, ethernet master. # exi and sync share the HFOSC net (frequency- and phase-matched); the # AsyncFIFOs between them are still valid CDC and keep the module # boundaries clean. Only the tiny capture front-end needs the fast # clock — which is why 27 MHz-EXI / OG performance is reachable on the # iCE40UP5K even though the register-file logic tops out ~44 MHz. if platform is not None: # capture @ 54 MHz: icepll -i 12 -o 54 → DIVR=0 DIVF=71 DIVQ=4. # 54 MHz = 2× the 27 MHz EXI clock — the minimum oversampling that # cleanly implements SPI Mode 3. The isolated SPI bit engine closes # ~91 MHz on this device; the byte-FIFO read path brings the # integrated capture domain to ~62 MHz, so 54 closes with margin. m.domains += ClockDomain("capture") platform.lookup(platform.default_clk).attrs["GLOBAL"] = False m.submodules.pll = Instance( "SB_PLL40_PAD", p_FEEDBACK_PATH = "SIMPLE", p_DIVR = 0, p_DIVF = 71, p_DIVQ = 4, p_FILTER_RANGE = 1, i_PACKAGEPIN = platform.request("clk12", dir="-").io, i_RESETB = Const(1, 1), i_BYPASS = Const(0, 1), o_PLLOUTGLOBAL = ClockSignal("capture"), ) # exi & sync @ 24 MHz: one SB_HFOSC (÷2) drives both slow domains. # The bulky register-file / SPRAM / W5500 logic is routing-bound at # ~33–44 MHz on the UP5K; 24 MHz closes with large margin. The byte # rate (27 MHz EXI ÷ 8 ≈ 3.4 MHz) leaves ~7 slow cycles per byte. m.domains += ClockDomain("exi") m.domains += ClockDomain("sync") m.submodules.hfosc = Instance( "SB_HFOSC", p_CLKHF_DIV = "0b01", # 48 ÷ 2 → 24 MHz i_CLKHFEN = Const(1, 1), i_CLKHFPU = Const(1, 1), o_CLKHF = ClockSignal("exi"), ) m.d.comb += ClockSignal("sync").eq(ClockSignal("exi")) # (simulation: test harness provides capture/exi/sync clocks via add_clock) # ── Submodules ──────────────────────────────────────────────────── cap = ExiCapture() # SPI bit engine (capture) + byte FIFOs reg = BBARegisterFile() arb = SPRAMArbiter() asm = RXFrameAssembler() drain = TXFrameDrain() eth = (W5500SPIMaster(reset_cycles=self._reset_cycles) if self._eth == "w5500" else W5100ParallelMaster(reset_cycles=self._reset_cycles)) m.submodules.cap = cap m.submodules.reg = reg m.submodules.arb = arb m.submodules.asm = asm m.submodules.drain = drain m.submodules.eth = eth # ── External pin connections ────────────────────────────────────── m.d.comb += [ # EXI inputs (to capture-domain front-end) cap.spi_clk .eq(self.exi_clk), cap.spi_mosi.eq(self.exi_mosi), cap.spi_cs_n.eq(self.exi_cs_n), # EXI outputs self.exi_miso.eq(cap.spi_miso), self.int_n .eq(reg.exi_int_n), ] # Ethernet back-end physical pins if self._eth == "w5500": m.d.comb += [ self.w5500_clk .eq(eth.spi_clk), self.w5500_mosi.eq(eth.spi_mosi), self.w5500_cs_n.eq(eth.spi_cs_n), eth.spi_miso .eq(self.w5500_miso), eth.w5500_int_n.eq(self.w5500_int_n), self.w5500_rst_n.eq(eth.w5500_rst_n), ] else: m.d.comb += [ self.w5100_addr .eq(eth.bus_addr), self.w5100_data_o .eq(eth.bus_data_o), self.w5100_data_oe.eq(eth.bus_data_oe), eth.bus_data_i .eq(self.w5100_data_i), self.w5100_cs_n .eq(eth.cs_n), self.w5100_rd_n .eq(eth.rd_n), self.w5100_wr_n .eq(eth.wr_n), eth.w5100_int_n .eq(self.w5100_int_n), self.w5100_rst_n .eq(eth.w5100_rst_n), ] # ── ExiCapture byte stream ↔ BBARegisterFile (exi domain) ──────── m.d.comb += [ reg.rx_data .eq(cap.rx_data), reg.rx_rdy .eq(cap.rx_rdy), cap.rx_en .eq(reg.rx_en), cap.tx_data .eq(reg.tx_data), cap.tx_en .eq(reg.tx_en), reg.tx_rdy .eq(cap.tx_rdy), reg.cs_active.eq(cap.cs_active), # transaction-active (for DMA reads) ] # ── BBARegisterFile ↔ SPRAMArbiter (sync domain FIFO sides) ────── # SPRAM request: reg exi→sync FIFO read side → arb m.d.comb += [ arb.exi_req_addr .eq(reg.spram_req_r_data), arb.exi_req_valid.eq(reg.spram_req_r_rdy), reg.spram_req_r_en.eq(arb.exi_req_ready), ] # SPRAM response: arb result → reg sync→exi FIFO write side m.d.comb += [ reg.spram_rsp_w_data.eq(arb.exi_rsp_data), reg.spram_rsp_w_en .eq(arb.exi_rsp_valid), # arb does not need w_rdy feedback (spram_rsp FIFO is deeper than latency) ] # ── BBARegisterFile ↔ TXFrameDrain (sync domain FIFO sides) ────── m.d.comb += [ drain.tx_bytes_r_data.eq(reg.tx_bytes_r_data), drain.tx_bytes_r_rdy .eq(reg.tx_bytes_r_rdy), reg.tx_bytes_r_en .eq(drain.tx_bytes_r_en), drain.tx_ctrl_r_data.eq(reg.tx_ctrl_r_data), drain.tx_ctrl_r_rdy .eq(reg.tx_ctrl_r_rdy), reg.tx_ctrl_r_en .eq(drain.tx_ctrl_r_en), ] # ── TXFrameDrain ↔ ethernet master (sync domain) ────────────────── m.d.comb += [ eth.tx_data .eq(drain.tx_data), eth.tx_valid.eq(drain.tx_valid), drain.tx_ready.eq(eth.tx_ready), eth.tx_sof .eq(drain.tx_sof), eth.tx_eof .eq(drain.tx_eof), ] # ── ethernet master → RXFrameAssembler (sync domain) ───────────── m.d.comb += [ asm.rx_data .eq(eth.rx_data), asm.rx_valid.eq(eth.rx_valid), eth.rx_ready.eq(asm.rx_ready), asm.rx_sof .eq(eth.rx_sof), asm.rx_eof .eq(eth.rx_eof), ] # ── RXFrameAssembler → SPRAMArbiter (ETH write, sync domain) ───── m.d.comb += [ arb.eth_wr_addr .eq(asm.eth_wr_addr), arb.eth_wr_data .eq(asm.eth_wr_data), arb.eth_wr_valid.eq(asm.eth_wr_valid), asm.eth_wr_ready.eq(arb.eth_wr_ready), ] # ── RXFrameAssembler → BBARegisterFile (rx_wptr FIFO write side) ─ m.d.comb += [ reg.rx_wptr_w_data.eq(asm.rx_wptr_w_data), reg.rx_wptr_w_en .eq(asm.rx_wptr_w_en), asm.rx_wptr_w_rdy .eq(reg.rx_wptr_w_rdy), ] # ── Pulse synchronizer connections ──────────────────────────────── m.d.comb += [ # RX irq: sync → exi (RXFrameAssembler → reg → PS → exi domain) reg.rx_irq_i.eq(asm.rx_irq), # TX irq: sync → exi reg.tx_irq_i.eq(drain.tx_irq), # MAC address (PAR0–5) → SHAR. exi and sync share the HFOSC net, # and par is quasi-static (sampled by the master at init_req). eth.par.eq(reg.par), ] # ── RX enabled gate (NCRA SR / start-receive bit) ───────────────── # The RX ring-buffer path is active only after the GC sets NCRA[3]. m.d.comb += asm.rx_enabled.eq(reg.ncra_sr) # ── Optional bring-up peripherals (sync domain) ─────────────────── # Build init_req as an OR of all reinit sources (NCRA pulse plus any # manual re-init from the status panel and/or the UART 'r' command). # "ready" is latched high by eth.init_done and cleared by any init_req. # It is computed only when at least one peripheral needs it. init_req = reg.ncra_rst_o # base: GC-issued NCRA reset need_ready = self._status_panel or self._uart_console if need_ready: ready = Signal() if self._status_panel: panel = StatusPanel() m.submodules.panel = panel init_req = init_req | panel.reinit # cs_active lives in the exi domain; bring it to sync for the LED. cs_a_sync = Signal() m.submodules.panel_cs = FFSynchronizer( cap.cs_active, cs_a_sync, o_domain="sync") m.d.comb += [ panel.cs_active.eq(cs_a_sync), panel.rx_pulse .eq(asm.rx_irq), panel.tx_pulse .eq(drain.tx_irq), panel.ready .eq(ready), panel.btn .eq(self.panel_btn), self.panel_led .eq(panel.led), ] if self._uart_console: console = UARTConsole() m.submodules.console = console init_req = init_req | console.reinit m.d.comb += [ console.ncra_rst.eq(reg.ncra_rst_o), console.rx_pulse.eq(asm.rx_irq), console.tx_pulse.eq(drain.tx_irq), console.ready .eq(ready), self.uart_tx .eq(console.uart_tx), console.uart_rx .eq(self.uart_rx), ] if need_ready: with m.If(eth.init_done): m.d.sync += ready.eq(1) with m.Elif(init_req): m.d.sync += ready.eq(0) m.d.comb += eth.init_req.eq(init_req) return m # ── Integration testbench ───────────────────────────────────────────────── # Drives real EXI Mode-3 transactions on the GC-facing pins and checks the # response — exercising the full chain ExiCapture (capture domain) ↔ byte FIFOs # ↔ BBARegisterFile (exi domain) ↔ sync modules, across all three clock domains. if __name__ == "__main__": import sys from amaranth.sim import Simulator, Period dut = BBATop(eth="w5100", reset_cycles=20, # small reset wait for sim status_panel=True) # also exercise the panel wiring errors = [] HALF = 8 # capture ticks per SPI half-period (well-oversampled) async def spi_byte(ctx, mosi_val): """Drive one EXI Mode-3 byte; return the assembled MISO byte.""" miso = 0 for bit in range(7, -1, -1): ctx.set(dut.exi_mosi, (mosi_val >> bit) & 1) ctx.set(dut.exi_clk, 0) # falling: slave samples MOSI await ctx.tick("capture").repeat(HALF) miso = (miso << 1) | ctx.get(dut.exi_miso) ctx.set(dut.exi_clk, 1) # rising await ctx.tick("capture").repeat(HALF) return miso async def exi_read(ctx, addr, length): """EXI immediate read: 2-byte header, clock-idle gap, then `length` bytes.""" hdr0 = (addr >> 6) & 0x7F # The header length field is only 2 bits ([1:0]); mask it so a long # (DMA) read doesn't overflow length-1 into the addr[5:0] bits. For # SPRAM reads the field is ignored anyway — the stream runs until CS. hdr1 = ((addr & 0x3F) << 2) | ((length - 1) & 0x3) ctx.set(dut.exi_cs_n, 0) ctx.set(dut.exi_clk, 1) await ctx.tick("capture").repeat(HALF) await spi_byte(ctx, hdr0) await spi_byte(ctx, hdr1) # EXI_Imm clock-idle gap: the core decodes the header and prefetches # responses into the tx FIFO before the GC clocks the data phase. await ctx.tick("capture").repeat(HALF * 12) result = [await spi_byte(ctx, 0x00) for _ in range(length)] ctx.set(dut.exi_cs_n, 1) await ctx.tick("capture").repeat(HALF) return result async def exi_write(ctx, addr, data): """EXI immediate write: 2-byte header then the data bytes.""" hdr0 = 0x80 | ((addr >> 6) & 0x7F) hdr1 = ((addr & 0x3F) << 2) | (len(data) - 1) ctx.set(dut.exi_cs_n, 0) ctx.set(dut.exi_clk, 1) await ctx.tick("capture").repeat(HALF) await spi_byte(ctx, hdr0) await spi_byte(ctx, hdr1) for b in data: await spi_byte(ctx, b) ctx.set(dut.exi_cs_n, 1) await ctx.tick("capture").repeat(HALF) # ── W5100 indirect-bus slave model (drives w5100_data_i) ───────────── # Pre-loads a known MACRAW packet in the RX buffer so we can verify the full # ethernet→SPRAM→GC path. Same protocol as the W5100ParallelMaster bench. RX_FRAME = [0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03, 0x04] _W_RX_BASE = 0x6000 _W_S0_CR = 0x0401 _W_S0_RX_RSR = 0x0426 _W_S0_RX_RD = 0x0428 _W_CR_RECV = 0x40 _A_MR, _A_AR0, _A_AR1, _A_DR = 0b00, 0b01, 0b10, 0b11 def w5100_preload(): plen = len(RX_FRAME) + 2 # MACRAW length includes its header mem = {} for i, b in enumerate([(plen >> 8) & 0xFF, plen & 0xFF] + RX_FRAME): mem[_W_RX_BASE + i] = b mem[_W_S0_RX_RSR], mem[_W_S0_RX_RSR + 1] = (plen >> 8) & 0xFF, plen & 0xFF mem[_W_S0_RX_RD], mem[_W_S0_RX_RD + 1] = 0, 0 return mem w5100_mem = w5100_preload() async def w5100_model(ctx): idm_ar = 0 mr = 0 prev_cs = prev_rd = prev_wr = 1 async for vals in ctx.tick("sync").sample( dut.w5100_cs_n, dut.w5100_rd_n, dut.w5100_wr_n, dut.w5100_addr, dut.w5100_data_o): cs, rd, wr, a, do = vals[-5:] ai = (mr >> 1) & 1 if cs == 0 and rd == 0: # drive read data if a == _A_MR: val = mr elif a == _A_AR0: val = (idm_ar >> 8) & 0xFF elif a == _A_AR1: val = idm_ar & 0xFF else: val = w5100_mem.get(idm_ar, 0) ctx.set(dut.w5100_data_i, val) if cs == 0 and prev_wr == 0 and wr == 1: # latch write on /WR rising if a == _A_MR: mr = do elif a == _A_AR0: idm_ar = (idm_ar & 0x00FF) | (do << 8) elif a == _A_AR1: idm_ar = (idm_ar & 0xFF00) | do else: w5100_mem[idm_ar] = do if idm_ar == _W_S0_CR and do == _W_CR_RECV: w5100_mem[_W_S0_RX_RSR] = 0 w5100_mem[_W_S0_RX_RSR + 1] = 0 if ai: idm_ar = (idm_ar + 1) & 0xFFFF if cs == 0 and prev_rd == 0 and rd == 1 and a == _A_DR and ai: idm_ar = (idm_ar + 1) & 0xFFFF prev_cs, prev_rd, prev_wr = cs, rd, wr async def testbench(ctx): ctx.set(dut.exi_clk, 1) ctx.set(dut.exi_cs_n, 1) ctx.set(dut.panel_btn, 0b111) # all buttons released (active-low idle) await ctx.tick("capture").repeat(20) # T1: device ID — read 4 bytes from addr 0 → 0x04 0x02 0x02 0x00 dev = await exi_read(ctx, 0x0000, 4) print(f"T1 device ID: {[f'0x{b:02X}' for b in dev]}") if dev != [0x04, 0x02, 0x02, 0x00]: errors.append(f"T1 device ID: got {dev}") await ctx.tick("capture").repeat(HALF) # T2: write PAR0–3, read them back through the full chain await exi_write(ctx, 0x20, [0xDE, 0xAD, 0xBE, 0xEF]) await ctx.tick("capture").repeat(HALF * 4) par = await exi_read(ctx, 0x20, 4) print(f"T2 PAR0-3 readback: {[f'0x{b:02X}' for b in par]}") if par != [0xDE, 0xAD, 0xBE, 0xEF]: errors.append(f"T2 PAR readback: got {par}") await ctx.tick("capture").repeat(HALF) # T3: NWAYS must read back the hardcoded 0x17 (link-up sentinel) nways = await exi_read(ctx, 0x31, 1) print(f"T3 NWAYS: 0x{nways[0]:02X} (want 0x17)") if nways != [0x17]: errors.append(f"T3 NWAYS: got {nways}") await ctx.tick("capture").repeat(HALF) # T4: DMA-style SPRAM read — clock 8 data bytes (past the 4-byte header # limit) within one CS. Exercises the integrated streaming path: # ExiCapture(cs_active) → register file SPRAM_STREAM → SPRAMArbiter → # real SPRAM → MISO, plus the SPRAM_END cleanup. SPRAM is uninitialised # here, so we check the stream completes (8 bytes, no underrun/hang) # rather than specific data. dma = await exi_read(ctx, 0x0100, 8) print(f"T4 DMA read (8B from 0x100): {[f'0x{b:02X}' for b in dma]}") if len(dma) != 8: errors.append(f"T4 DMA read length: got {len(dma)}") await ctx.tick("capture").repeat(HALF) # T5: a register read after the streaming read confirms the FSM cleaned # up (SPRAM_END → HEADER0) and the device is responsive again. nways2 = await exi_read(ctx, 0x31, 1) print(f"T5 NWAYS after DMA: 0x{nways2[0]:02X} (want 0x17)") if nways2 != [0x17]: errors.append(f"T5 NWAYS after DMA read: got {nways2}") await ctx.tick("capture").repeat(HALF) # ── T6: FULL ETHERNET→SPRAM→GC LOOP ────────────────────────────── # A frame arrives from the network (W5500 model) → W5500 master reads it # → RXFrameAssembler writes it to the SPRAM ring → GC reads RWP then # DMA-reads the descriptor+frame back. Exercises the entire RX path. # The W5100 needs its init sequence (which sets MR.AI / opens socket 0) # before multi-byte bus accesses work — trigger it via NCRA reset, as # the real GC driver does, and let it run before enabling RX. await exi_write(ctx, 0x00, [0x01]) # NCRA reset → init_req pulse await ctx.tick("capture").repeat(2000) # let W5100 init run await exi_write(ctx, 0x00, [0x08]) # NCRA SR bit → enable RX await ctx.tick("capture").repeat(HALF * 2) ctx.set(dut.w5100_int_n, 0) # W5100: a packet was received await ctx.tick("capture").repeat(4000) # let the W5100 RX + SPRAM write run ctx.set(dut.w5100_int_n, 1) await ctx.tick("capture").repeat(HALF * 2) rwp = await exi_read(ctx, 0x16, 1) # RX write pointer (page) total_len = len(RX_FRAME) + 4 got = await exi_read(ctx, 0x0100, total_len) # descriptor + frame want = [0x00, 0x00, (total_len >> 8) & 0xFF, total_len & 0xFF] + RX_FRAME print(f"T6 RWP=0x{rwp[0]:02X} (want 0x02)") print(f"T6 SPRAM[0x100]: {[f'0x{b:02X}' for b in got]}") print(f"T6 expected : {[f'0x{b:02X}' for b in want]}") if rwp != [0x02]: errors.append(f"T6 RWP: got {rwp}, want [0x02]") if got != want: errors.append(f"T6 RX frame mismatch:\n got {got}\n want {want}") # T7: status-panel integration — after all the EXI traffic above, the # EXI-activity LED (panel led[1] = stretched cs_active) must be lit, # proving cap.cs_active → FFSync → StatusPanel → LED is wired end-to-end. leds = ctx.get(dut.panel_led) if not (leds >> 1) & 1: errors.append(f"T7 panel: EXI-activity LED not lit (led=0b{leds:05b})") print(f"T7 panel led=0b{leds:05b} (bit1=EXI activity, expect 1)") sim = Simulator(dut) sim.add_clock(Period(MHz=54), domain="capture") sim.add_clock(Period(MHz=24), domain="exi") sim.add_clock(Period(MHz=24), domain="sync") sim.add_testbench(testbench) sim.add_process(w5100_model) sim.run() if errors: print("\nFAILURES:") for e in errors: print(" ", e) sys.exit(1) else: print("\nAll BBATop integration tests passed.")