Added full design created with Claude

This commit is contained in:
Dennis Brentjes
2026-06-13 18:35:38 +02:00
parent 57b5b471b8
commit 8d0ab1d948
30 changed files with 7424 additions and 395 deletions
+16
View File
@@ -0,0 +1,16 @@
FROM python:3.12-slim-bookworm
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
yosys \
nextpnr-ice40 \
fpga-icestorm \
nodejs npm \
&& rm -rf /var/lib/apt/lists/*
RUN npm install -g @anthropic-ai/claude-code
WORKDIR /workspace
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
+32
View File
@@ -0,0 +1,32 @@
#Requires -RunAsAdministrator
# Attaches the IceBreaker FPGA (FTDI FT2232H, VID 0403) to WSL2 via usbipd-win.
# Run this on the Windows host before opening the devcontainer.
$ErrorActionPreference = 'Stop'
if (-not (Get-Command usbipd -ErrorAction SilentlyContinue)) {
Write-Error "usbipd not found. Install it from: https://github.com/dorssel/usbipd-win/releases"
exit 1
}
# Find all devices with FTDI VID 0403
$devices = usbipd list | Where-Object { $_ -match '0403' }
if (-not $devices) {
Write-Error "No FTDI device (VID 0403) found. Is the IceBreaker plugged in?"
exit 1
}
if (($devices | Measure-Object).Count -gt 1) {
Write-Host "Multiple FTDI devices found:"
$devices | ForEach-Object { Write-Host " $_" }
Write-Error "Ambiguous. Unplug other FTDI devices or run 'usbipd attach --wsl --busid <BUSID>' manually."
exit 1
}
# Extract BUSID (first token on the line, e.g. "3-1")
$busid = ($devices -split '\s+')[0].Trim()
Write-Host "Attaching IceBreaker at bus ID $busid to WSL2..."
usbipd attach --wsl --busid $busid
Write-Host "Done. You can now open the devcontainer and use iceprog."
+29
View File
@@ -0,0 +1,29 @@
{
"name": "Amaranth HDL - IceBreaker",
"build": {
"dockerfile": "Dockerfile",
"context": ".."
},
// USB flashing (iceprog) requires the IceBreaker to be forwarded to WSL2 first.
// On Windows: install usbipd-win (https://github.com/dorssel/usbipd-win/releases),
// then run (as Administrator) before opening this devcontainer:
// .devcontainer/attach-icebreaker.ps1
"runArgs": ["--privileged"],
"workspaceFolder": "/workspace",
"workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached",
"mounts": [
"source=${localEnv:USERPROFILE}/.claude,target=/root/.claude,type=bind,consistency=cached"
],
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-python.pylance",
"anthropic.claude-code"
],
"settings": {
"python.defaultInterpreterPath": "/usr/local/bin/python"
}
}
}
}
+16
View File
@@ -0,0 +1,16 @@
# Generated FPGA build artifacts (regenerate with: python -m exi_bba.synth)
build/
# Simulation waveforms (regenerate by running the testbenches)
*.vcd
# Python
__pycache__/
*.pyc
*.pyo
.venv/
venv/
# Editor / OS cruft
.DS_Store
*.swp
+493
View File
@@ -0,0 +1,493 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project: GC BBA FPGA Replacement
Replace the GameCube Broadband Adapter (DOL-015 / MX98730EC) with an iCEbreaker
FPGA (Lattice iCE40UP5K) written in Amaranth HDL. The FPGA emulates the BBA
register interface over the GameCube EXI bus and bridges to a WIZnet ethernet
chip for real 100BASE-TX ethernet — default **W5100** (indirect parallel bus,
reaches the EXI throughput ceiling) or **W5500** (SPI Pmod, simpler wiring but
~12 Mbit/s). GC software (Swiss homebrew) sees an identical BBA. See "W5100 vs
W5500 ethernet back-end".
---
## Development Environment
**Preferred:** Use the devcontainer (`.devcontainer/`) which includes Python 3.12,
`nextpnr-ice40`, and `fpga-icestorm` pre-installed.
**Windows host + WSL2 devcontainer — USB flashing setup:**
1. Install `usbipd-win` (https://github.com/dorssel/usbipd-win/releases)
2. Run `.devcontainer/attach-icebreaker.ps1` as Administrator before opening the devcontainer
3. The devcontainer runs `--privileged` to pass through the USB device
**Local venv (outside devcontainer):**
```bash
python -m venv .venv
source .venv/bin/activate # Windows: .venv\Scripts\activate
pip install -r requirements.txt
```
Yosys is bundled in `amaranth-yosys`; `nextpnr-ice40` and `iceprog` must be
installed separately (via apt on Linux, or via the devcontainer).
---
## Commands
**Build and flash the iCEbreaker (must run from workspace root):**
```bash
python rebbarb/rebbarb.py
```
Runs synthesis (yosys), place-and-route (nextpnr-ice40), and flashes via `iceprog`.
Set `ICEPROG=/path/to/iceprog` env var to override the binary location.
Note: `rebbarb/rebbarb.py` builds a 36 MHz LED blink demo. The BBA
implementation (`exi_bba/`) uses a split-domain clock: `capture` @ 54 MHz (PLL)
for the SPI bit engine, `exi`/`sync` @ 24 MHz (HFOSC) for everything else.
Synthesize/flash the real design with `python -m exi_bba.synth [--flash]`.
**Run a simulation:**
```bash
# New-API testbench style (preferred for new code):
python rebbarb/toggle_button.py # writes ToggleButton.vcd
python rebbarb/pulse_button.py # writes PulseButton.vcd
# Old-API process style (reference only, do not replicate in new code):
python examples/amaranth_cdc.py # CDC primitives demo
python examples/async_fifo.py # AsyncFIFO behaviour
python examples/icebreaker_fifo.py # iCEbreaker-specific FIFO (Verilog dump)
```
Open VCD output with `gtkwave`. Simulations are the primary testing mechanism —
there is no separate test runner.
**Verify PLL parameters:**
```bash
icepll -i 12 -o 54 # confirms DIVR=0 DIVF=71 DIVQ=4 → 54 MHz (capture domain)
```
(`exi`/`sync` come from the internal SB_HFOSC ÷2 = 24 MHz — no PLL.)
---
## Current Implementation State
The `exi_bba/` module tree is **fully implemented** with simulation testbenches.
All modules elaborate without errors and pass their unit tests. The full design
**synthesizes, places, routes, and meets timing** on the iCE40UP5K
(`python -m exi_bba.synth`): `capture` closes ~70 MHz (target 54) and `exi`/
`sync` close ~36 MHz (target 24) — both PASS.
### `exi_bba/` module status
| Module | File | Tests pass |
|---|---|---|
| `BBATop` | `exi_bba/bba_top.py` | ✅ EXI integration + full W5100→SPRAM→GC RX loop; synth PASS |
| `ExiCapture` | `exi_bba/exi_capture.py` | ✅ rx/tx byte-stream + over-push/flush |
| `SPIMode3Slave` | `exi_bba/spi_mode3_slave.py` | ✅ 4 tests (live-drive TX) |
| `BBARegisterFile` | `exi_bba/bba_register_file.py` | ✅ 7 tests (proactive push + DMA stream) |
| `SPRAMArbiter` | `exi_bba/spram_arbiter.py` | ✅ 3 tests |
| `RXFrameAssembler` | `exi_bba/rx_frame_assembler.py` | ✅ 3 tests |
| `TXFrameDrain` | `exi_bba/tx_frame_drain.py` | ✅ 2 tests |
| `W5100ParallelMaster` | `exi_bba/w5100_parallel_master.py` | ✅ 5 tests (init/TX/RX vs bus model, incl. ring wrap) — **default eth back-end** |
| `W5500SPIMaster` | `exi_bba/w5500_spi_master.py` | ✅ init/TX/RX vs SPI-slave model (alt back-end) |
| `StatusPanel` | `exi_bba/status_panel.py` | ✅ 6 tests (heartbeat, stretched activity LEDs, debounced buttons, freeze) |
| `EEPROMModel` | `exi_bba/eeprom_model.py` | ✅ 4 tests |
**Bring-up status panel (optional):** `BBATop(status_panel=True)` adds a
`StatusPanel` driving onboard iCEbreaker LEDs + button (dedicated pins, so it
coexists with EXI + W5100). `synth.py` enables it: **LEDG=heartbeat**,
**LEDR=EXI activity** (the GC is talking), **BTN_N=manual re-init**. The full
EXI + W5100 + panel build synthesizes and meets timing (slow ~35≥24, capture
~64≥54, 44% LC). Panel LEDs 35 (rx/tx/ready) exist in the module but aren't
mapped on the iCEbreaker (only 2 discrete LEDs); the onboard RGB or a custom
PCB can expose them.
**Ethernet back-end is selectable:** `BBATop(eth="w5100")` (default — indirect
parallel bus, reaches the ~27 Mbit/s EXI ceiling) or `BBATop(eth="w5500")` (SPI,
~12 Mbit/s). Both masters expose the identical tx/rx/init/par streaming
interface; only the physical pins differ. See "W5100 vs W5500" below.
### Run all module testbenches (from workspace root)
```bash
python -m exi_bba.spi_mode3_slave
python -m exi_bba.exi_capture
python -m exi_bba.bba_register_file
python -m exi_bba.spram_arbiter
python -m exi_bba.rx_frame_assembler
python -m exi_bba.tx_frame_drain
python -m exi_bba.w5100_parallel_master # 5 tests: init, TX(+wrap), RX(+wrap)
python -m exi_bba.w5500_spi_master
python -m exi_bba.status_panel # 6 tests: heartbeat/activity/buttons
python -m exi_bba.eeprom_model
python -m exi_bba.bba_top # end-to-end EXI integration test (W5100 RX loop)
```
### Pending work
- **Synthesis/timing**: ✅ done — `python -m exi_bba.synth` synthesizes, P&Rs,
and meets timing on both clock domains (capture ~68≥54, slow ~40≥24).
- **W5500 init/TX/RX**: ✅ done — `W5500SPIMaster` has a real Mode-0 byte engine,
a generic register-transaction engine (header + wbuf/stream payload), the full
init sequence (MR reset, SHAR, S0_MR MACRAW, S0_CR OPEN, S0_IMR), MACRAW TX
(read TX_WR → stream frame to TX buffer → advance TX_WR → SEND) and MACRAW RX
(RSR → RD → 2-byte length → stream frame out → advance RD → RECV). All verified
on the wire by a responding W5500 SPI-slave model in the testbench.
- **PAR05 → W5500 SHAR**: ✅ done — `reg.par` wired to `w5500.par` in `BBATop`
(PAR0 packed in the low byte so it is the first SHAR octet).
- **NCRA SR bit**: ✅ done — `BBARegisterFile.ncra_sr` (= NCRA[3]) gates
`asm.rx_enabled` in `BBATop` (was hard-wired to 1).
- **W5500 SPI throughput**: SCK = sync÷2 = 12 MHz (~12 Mbit/s) — exceeds
real-world GC BBA TCP throughput (~610 Mbit/s) but is below the 27 Mbit/s raw
EXI ceiling. Pushing past 12 Mbit/s was investigated and found NOT achievable
on this UP5K (the W5500-operating logic is distributed ~40 MHz, not just the
bit-bang) — see the "Full-rate W5500 SPI" item below.
`W5500SPIMaster(clk_div=N)` divides SCK further if signal integrity needs it.
- **EXI DMA bulk reads**: ✅ done — SPRAM-region reads (addr ≥ 0x100) now STREAM
until CS deasserts instead of stopping at the header's 2-bit length, so they
serve both ≤4-byte immediate reads (Swiss) AND arbitrary-length DMA reads
(other GC software, and a future Swiss path for loading ROMs from a network
file store). Implementation:
- `SPIMode3Slave.cs_active` (synchronised CS level) → `ExiCapture` crosses it
to the exi domain (FFSynchronizer) → `BBARegisterFile.cs_active`.
- `BBARegisterFile` SPRAM_STREAM state: auto-increments the SPRAM address,
prefetches up to SP_LIMIT=4 reads in flight, pushes responses to tx_fifo;
SPRAM_END drains the in-flight pipeline + rx dummies on CS-rise.
- `ExiCapture` flushes tx_fifo on CS-fall to clear prefetch over-push so a
truncated DMA read can't leak stale bytes into the next transaction.
Tested: register-file streaming read (SPRAM model, 12 bytes), ExiCapture
over-push/flush, AND the full BBATop loop — a W5500 model delivers a frame →
W5500 master RX → RXFrameAssembler writes the SPRAM ring → GC reads RWP then
DMA-reads the descriptor+frame back (verified byte-for-byte).
Note: a DMA read header must keep length-1 within the 2-bit field; the GC
driver sets it ≤3 and clocks the real length via CS (the design streams
until CS regardless). (EXI DMA *writes* are not implemented; the GC's
DMA-write engine has a 1-bit-shift bug and Swiss avoids them — see
design-doc §"EXI DMA bug".)
- **S0_IR interrupt clear after RX**: ✅ done — `W5500SPIMaster` RX_CLR_IR state
writes Sn_IR[2]=1 after RECV so `INT_N` deasserts (else the FSM would re-enter
RX_CHECK forever on real hardware).
- **Full-rate W5500 SPI (27 Mbit/s) — INVESTIGATED, NOT achievable on UP5K**:
the W5500 SCK is sync÷2 = 12 MHz. Raising it needs the SPI engine on a ≥54 MHz
clock, but a standalone synth of `W5500SPIMaster` in the capture domain closes
only **40 MHz** — and the slack histogram shows the failure is *distributed*
(~140 endpoints fail 54, incl. the `wbuf`/header mux feeding the shift
register), NOT a single cuttable path. So the bottleneck is the **logic that
operates the SPI device** (transaction FSM, byte sourcing), not the bit-bang.
Consequences:
- The "split the bit engine to capture + per-byte CDC handshake" idea nets
only ~14 Mbit/s — the CDC round-trip ≈ the SPI byte time — not worth it.
- A capture-domain "streaming executor" would still contain that distributed
~40 MHz logic, so it wouldn't close 54 either.
- Hardware `SB_SPI` wouldn't help (it only offloads the bit-bang, which was
never the bottleneck) and is unsimulatable.
- There is no usable clock between 24 (HFOSC) and 54 (the one PLL, needed at
54 for the EXI front-end); PLL÷2 = 27 → SCK 13.5 MHz, a ~12% gain, not
worth the fabric divider.
Net: 12 Mbit/s is the practical W5500 ceiling on this part. It exceeds
real-world GC BBA TCP throughput and is fine for chunked ROM streaming.
Reaching 27 Mbit/s would need a faster FPGA or a much shallower W5500-operating
redesign (uncertain) — **OR a parallel-bus ethernet chip (see W5100 below)**,
which is the implemented solution for the ROM-streaming throughput target.
## W5100 vs W5500 ethernet back-end
The throughput insight: SPI serialises 8 bits/byte, so the W5500 byte rate is
(operating-logic clock)/16 — and that logic caps ~40 MHz on this UP5K → ~12
Mbit/s. A **parallel** bus moves a whole byte per access, so the *same* ~24 MHz
`sync` logic clears the 27 Mbit/s EXI ceiling (the real hard limit — the GC EXI
bus tops out there). So `W5100ParallelMaster` is the throughput path and is now
the `BBATop` default.
- **Interface:** W5100 **indirect parallel bus** (IDM). Only A[1:0] are wired
(board ties A[14:2]=0 so a power-up direct access at A=00 still hits MR):
`00`=MR, `01`=IDM_AR0(hi), `10`=IDM_AR1(lo), `11`=IDM_DR. A register/buffer
access = write IDM_AR (the 16-bit address) then read/write IDM_DR. With MR.AI
set, IDM_DR auto-increments → a multi-byte block is one address-set + a burst.
- **Bus engine:** drives A + D with `/CS` and `/RD`|`/WR` asserted for
`strobe_cycles` (default 3 ≈ 125 ns at 24 MHz, ≥ the W5100's ~80 ns access).
DATA[7:0] is bidirectional → an SB_IO tristate (`bus_data_o`/`oe`/`i`).
- **Pins (15):** A[1:0]=2, D[7:0]=8, /CS,/RD,/WR=3, /INT=1, /RST=1. With EXI (5)
+ clk (1) = **21 of ~34 usable SG48 I/O** — comfortable. See `synth.py`.
- **MR.AI requires init first:** unlike the W5500 (each SPI transaction is
self-framed), the W5100's multi-byte accesses depend on MR.AI, so the init
sequence (triggered by the GC's NCRA reset) MUST run before any TX/RX. The
BBATop test issues NCRA-reset before its RX loop for this reason; on hardware
the GC driver already does. (`BBATop(reset_cycles=N)` shrinks the MR settle
wait for sim.)
- **Ring wraparound is in fabric:** the W5100 does NOT auto-wrap the IDM address
at the socket-buffer boundary (the W5500 did), so the streamer re-sets IDM_AR
to the buffer base when the running address reaches the 2 KB boundary. Handled
in the SW/SR/RB paths (`xfer_wrap`/`xfer_wbase`/`xfer_wend`/`cur_addr`); both
TX and RX wrap cases are tested.
- **Register map differs from the W5500:** common regs at 0x0000 (MR, SHAR 0x09,
IMR 0x16, RMSR/TMSR 0x1A/0x1B), socket 0 at 0x0400 (S0_MR/CR/IR, TX_WR 0x424,
RX_RSR 0x426, RX_RD 0x428), TX buffer 0x4000, RX buffer 0x6000. MACRAW mode.
- **Status:** init/TX/RX (with wrap) verified vs a bus model; BBATop full
W5100→SPRAM→GC RX loop passes byte-for-byte; synth PASS (slow ~32≥24, capture
~56≥54, 42% LC). Register addresses/MR bits are from the datasheet (from
memory) — **confirm at hardware bring-up**.
### `rebbarb/` — LED blink demo (unchanged)
- `rebbarb.py` — blinks LEDs via a PLL (36 MHz), demonstrates `IceBreakerPlatform`
- `debouncer.py``Debouncer(cycles)` — synchronous debounce, configurable hold
- `toggle_button.py``ToggleButton` — edge-to-toggle state machine (wraps Debouncer)
- `pulse_button.py``PulseButton` — single-cycle pulse on rising edge (wraps Debouncer)
These components are reusable building blocks. The `Debouncer` and button wrappers
will be needed for any physical input in `exi_bba/`.
**Import note:** `rebbarb/` files use bare imports (`from debouncer import Debouncer`).
Run them as `python rebbarb/<file>.py` from the workspace root so Python adds
`rebbarb/` to `sys.path` automatically.
**Simulation at module level:** `toggle_button.py` and `pulse_button.py` run
their simulations unconditionally (no `__main__` guard) — importing either file
triggers a VCD write. New modules should guard simulation code with
`if __name__ == "__main__":`.
`examples/amaranth_cdc.py` contains handwritten `SyncFF` and `TogglePulseSync`
reference implementations — use `amaranth.lib.cdc` primitives (`FFSynchronizer`,
`PulseSynchronizer`) in production code instead.
`hardware/sp1_test_plug/` — KiCad project for a physical SP1 edge-connector test
plug (schematic, PCB, custom GameCube symbol library). Used to verify pad geometry
before ordering the interposer PCB; not part of the FPGA build.
---
## Amaranth Simulator API
Two API generations are present in this repo:
| API | Where used | Status |
|---|---|---|
| `sim.add_testbench(async_fn)` + `await ctx.tick()` + `Period(MHz=n)` | `rebbarb/*.py` | **Use this for new code** |
| `sim.add_sync_process(gen_fn)` + `sim.run_until(t)` | `examples/` | Old — reference only |
New modules should use the testbench API (`add_testbench`, `sim.write_vcd(ctx)`
context manager). The old process API still works but is not idiomatic in current
Amaranth.
**Critical testbench timing rule:** `ctx.get(signal)` reads signal values AFTER
the clock edge (post-update registered values). Combinatorial signals that depend
on registered signals that were updated by the SAME tick will already reflect the
new registered values. For example: if `tx_sof = tx_bytes_r_rdy & is_first` and
`is_first` is cleared synchronously on the first byte, then reading `tx_sof` after
the first byte's tick always returns 0 — read BEFORE the tick instead.
**`ctx.set()` takes effect immediately** (combinatorial, not registered). Use it
AFTER `await ctx.tick()` to prepare inputs for the NEXT tick.
The full design specification lives in `docs/gc_bba_fpga_design.md`.
---
## Key Architecture Decisions
- **No network stack in the FPGA.** The GC CPU runs TCP/IP. The FPGA is a dumb
MAC bridge.
- **Split-domain clocking — 3 domains, 2 sources (1 PLL + 1 HFOSC):**
- `capture` — 54 MHz (PLL, DIVR=0 DIVF=71 DIVQ=4). Hosts ONLY the SPI Mode 3
bit engine inside `ExiCapture`. 54 MHz = 2× the **real 27 MHz** EXI clock —
the minimum oversampling for clean Mode 3. The isolated bit engine closes
~91 MHz; integrated with the byte-FIFO read path the capture domain closes
~62 MHz, so 54 passes with margin.
- `exi` — 24 MHz (HFOSC ÷2). BBA register file / transaction FSM.
- `sync` — 24 MHz (same HFOSC net as `exi`). SPRAM arbiter, RX/TX engines,
W5500 SPI master.
- **Why split:** only the tiny SPI bit engine needs a fast clock to sample
27 MHz EXI. The bulky register-file/SPRAM/W5500 logic is routing-bound at
~3344 MHz on the UP5K and only needs the byte rate (27 MHz ÷ 8 ≈ 3.4 MHz).
`ExiCapture` bridges capture↔exi with rx/tx byte AsyncFIFOs.
- **EXI clock reality:** the GC EXI clock tops out at ~27 MHz. libogc's
`EXI_SPEED32MHZ` is a nominal name — the real rate is 27 MHz. The old
"96 MHz = 3× 32 MHz EXI" target was doubly wrong and unreachable on UP5K
(which caps ~44 MHz for non-trivial logic).
- **TX/MISO across the split:** the register file PROACTIVELY pushes read
responses into the tx byte FIFO during the EXI clock-idle gap (the GC pauses
the clock between an EXI_Imm header-write and the data-read). The bit engine
drives MISO live from the FIFO head; see `ExiCapture` / `SPIMode3Slave`.
- **All CDC via `amaranth.lib.cdc`.** Never pass raw multi-bit signals across
domains. Use `FFSynchronizer` for slow single bits, `PulseSynchronizer` for
events, `AsyncFIFO` for data streams, `ResetSynchronizer` for resets.
- **Register file lives entirely in `exi` domain.** The `sync` domain only
communicates through AsyncFIFOs and PulseSynchronizers — never direct register
reads/writes.
---
## Critical Protocol Notes
### EXI / SPI Mode 3
- CLK idles **HIGH** (CPOL=1, CPHA=1).
- MOSI sampled on **falling** CLK edge. MISO driven on **rising** CLK edge.
- Getting this wrong means the GC never enumerates the device.
- CS is active **low**, delineates each transaction.
### EXI Transaction Header (2 bytes before data)
```
Byte 0: [7]=write_flag [6:0]=addr[12:6]
Byte 1: [7:2]=addr[5:0] [1:0]=xfer_len-1 (0=1B … 3=4B)
```
Full address = 13 bits → 0x00000x1FFF.
### Device ID Query
On power-on the GC writes `0x0000` (2 bytes) then reads 4 bytes.
Must return: `0x04 0x02 0x02 0x00`.
---
## Memory Map (abridged)
| Range | Region |
|---|---|
| 0x00000x0033 | MAC control registers (register file, exi domain) |
| 0x0048 | TXDATA — bulk TX data port (→ `tx_bytes` AsyncFIFO) |
| 0x01000x0FFF | RX ring buffer in SPRAM (15 × 256-byte pages, pages 115) |
| 0x01000x1FFF | any read ≥ 0x0100 streams from SPRAM (DMA path); the ring proper is pages 115 above |
---
## Key Registers
| Addr | Name | Notes |
|---|---|---|
| 0x00 | NCRA | [0]=RESET self-clears; pulses `ncra_rst` to sync domain |
| 0x08 | IMR | Interrupt mask |
| 0x09 | IR | Write-1-to-clear. [1]=RI, [2]=TI. INT_N asserts when IR & IMR ≠ 0 |
| 0x1617 | RWP | RX write pointer — updated by sync domain via `rx_wptr` FIFO |
| 0x1819 | RRP | RX read pointer — GC writes after consuming frames |
| 0x2025 | PAR05 | MAC address; also forwarded to W5500 as SHAR |
| 0x31 | NWAYS | Hardcode **0x17** (100M full-duplex link up, autoneg complete) |
| 0x3A | HIPR | Hardcode **0x01** (BBA present) |
| 0x48 | TXDATA | GC streams TX frame bytes here |
---
## Module Breakdown
| Module | Domain | File |
|---|---|---|
| `BBATop` | all | `exi_bba/bba_top.py` |
| `ExiCapture` | capture (+exi FIFOs) | `exi_bba/exi_capture.py` |
| `SPIMode3Slave` | capture (param `domain`) | `exi_bba/spi_mode3_slave.py` |
| `BBARegisterFile` | exi (+FIFO to sync) | `exi_bba/bba_register_file.py` |
| `SPRAMArbiter` | sync | `exi_bba/spram_arbiter.py` |
| `RXFrameAssembler` | sync | `exi_bba/rx_frame_assembler.py` |
| `TXFrameDrain` | sync | `exi_bba/tx_frame_drain.py` |
| `W5100ParallelMaster` | sync | `exi_bba/w5100_parallel_master.py` (default eth) |
| `W5500SPIMaster` | sync | `exi_bba/w5500_spi_master.py` (alt eth) |
| `EEPROMModel` | exi | `exi_bba/eeprom_model.py` |
`ExiCapture` wraps `SPIMode3Slave` (in the fast `capture` domain) plus the
capture↔exi rx/tx byte AsyncFIFOs. `BBARegisterFile` consumes the rx byte
stream and proactively pushes read responses into the tx byte FIFO — it no
longer sees the per-bit SPI cadence (that lives entirely in `capture`).
---
## CDC Signal Inventory
| Signal | Direction | Primitive |
|---|---|---|
| EXI CLK / MOSI / CS pins | async → capture | `FFSynchronizer` (stages=2) |
| RX byte stream (capture→core) | capture → exi | `AsyncFIFO` 8-bit, depth=4 |
| TX byte stream (core→capture) | exi → capture | `AsyncFIFO` 8-bit, depth=2 |
| cs_active (transaction in progress) | capture → exi | `FFSynchronizer` (DMA read length) |
| SPRAM read request (addr) | exi → sync | `AsyncFIFO` 16-bit, depth=4 |
| SPRAM read result (data) | sync → exi | `AsyncFIFO` 8-bit, depth=4 |
| TX packet bytes | exi → sync | `AsyncFIFO` 8-bit, depth=16 |
| TX frame length | exi → sync | `AsyncFIFO` 16-bit, depth=4 |
| RX frame bytes | sync → SPRAM | `RXFrameAssembler``SPRAMArbiter` (not a byte FIFO; the GC reads frames back out of SPRAM via the SPRAM read req/rsp FIFOs) |
| RWP update | sync → exi | `AsyncFIFO` 8-bit, depth=4 |
| RRP update | exi → sync | `AsyncFIFO` 8-bit, depth=4 |
| RX ready (IR[RI]) | sync → exi | `PulseSynchronizer` |
| TX done (IR[TI]) | sync → exi | `PulseSynchronizer` |
| NCRA reset pulse | exi → sync | `PulseSynchronizer` |
---
## W5500 Configuration (on NCRA reset)
The W5500 selects the register **block** via the BSB field of the control byte,
NOT via the address — so register addresses below are **block offsets**, not flat
0x4000-style addresses (see `_W5500_*` and `_CTRL_*` in `w5500_spi_master.py`).
```
1. Write MR = 0x80 (common block, offset 0x0000) software reset
2. Wait ~1 ms
3. Write SHAR = MAC (common block, offset 0x0009, 6 bytes from PAR05)
4. Write S0_MR = 0x04 (socket-0 reg block, offset 0x0000) MACRAW
5. Write S0_CR = 0x01 (socket-0 reg block, offset 0x0001) OPEN
6. Write S0_IMR = 0x05 (socket-0 reg block, offset 0x002C) RECV | SEND_OK
```
W5500 SPI is **Mode 0** (CPOL=0 CPHA=0); SCK = **12 MHz** (the 24 MHz `sync`
domain ÷ 2 via a toggle clock-enable). Connect W5500 `INT_N` to an FPGA input
for low-latency RX detection. (The W5500 is the alternate back-end; the W5100
parallel master is the default — see "W5100 vs W5500".)
---
## Physical Interface (SP1 Edge Connector)
- PCB must be **1.2 mm thick, ENIG finish**.
- Staggered (not mirrored) top/bottom contact rows — same geometry as PCI/ISA.
- Derive exact pad geometry from **SP1ETH KiCad project** (silverstee1/SP1ETH),
cross-referenced with ETH2SP1 (LaserBear). Do not rely on YAGCD alone.
- Add **100 µF bulk cap** on the interposer near FPGA power pins (3.3 V budget
is tight: iCEbreaker ~80 mA + W5500 ~150 mA ≈ 230 mA).
- **Pin 5 is 12 V — do not connect to FPGA I/O.** Test point or leave open.
- `EXTIN` (pin 1): tie to 3.3 V via 10 kΩ — required for GC device enumeration.
- All signal levels are 3.3 V. No level shifting needed.
---
## SPRAM Notes
- iCE40UP5K has 128 KB SPRAM (SB_SPRAM256KA, 16-bit wide).
- **1-cycle synchronous read latency** — result of read at cycle N is valid at N+1.
- Byte writes via `MASKWREN`: lower byte = `0b0011`, upper byte = `0b1100`.
- Address to SPRAM = byte_address >> 1.
- ETH writes take priority over EXI reads in the arbiter (safe by ring-buffer
invariant: GC only reads pages the ETH engine has already finished).
---
## GC Initialisation Sequence (Swiss/BBA driver)
```
1. Write 0x0000 × 2, read 4 B → must get 0x04020200 (device ID)
2. Write NCRA = 0x01 (reset, self-clears; resets W5500 + SPRAM ptrs)
3. Poll NCRA bit 0 until 0 (wait reset complete)
4. Write PAR05 (MAC address)
5. Write MAR07 = 0xFF (promiscuous multicast)
6. Write ANALOG = 0xD6 (enable PHY — no FPGA effect, just store)
7. Write NWAYC (autoneg config — store only)
8. Write IMR = 0x86 (enable RBFI | TI | RI interrupts)
9. Write GCA (AUTOPUB bit)
10. Write NCRA SR bit = 0x08 (start receive)
11. Poll NWAYS until link up → return hardcoded 0x17 immediately
```
---
## Implementation Notes & Gotchas
- **`NWAYS` must return `0x17` always.** GC polls it to confirm 100 Mbps link
before enabling RX. Do not attempt to reflect real W5500 link status.
- **`EEPROMModel` can be stubbed initially.** Many GC BBA drivers write their own
MAC to PAR05 rather than using the EEPROM. Pre-populate PAR05 reset state
with a valid Nintendo OUI MAC (`00:09:BF:xx:xx:xx`).
- **`tx_load` timing in `SPIMode3Slave`:** pulses at CS assertion (first byte)
and after each complete received byte. Upstream must register next TX byte
within one `exi` clock.
- **PLL target 54 MHz**: verify with `icepll -i 12 -o 54` (DIVR=0 DIVF=71 DIVQ=4)
before coding PLL parameters; the capture-domain bit engine oversamples the
27 MHz EXI clock 2×.
- **TX buffer selection (NCRA ST bits):** Ignore buffer select (ST1 vs ST0).
Treat any non-zero ST as a TX trigger.
- **If nextpnr fails capture-domain timing at 54 MHz:** the isolated bit engine
closes ~91 MHz, so 54 has margin; if a seed fails, sweep seeds
(`synth.py --seeds N`) or instruct users to configure Swiss to a lower EXI
clock index.
-195
View File
@@ -1,195 +0,0 @@
$comment Generated by Amaranth $end
$date 2025-09-20 22:27:02.816595 $end
$timescale 1 fs $end
$scope module bench $end
$scope module top $end
$var wire 1 ! clk $end
$var wire 1 " rst $end
$var wire 1 # i $end
$var wire 1 $ i$3 $end
$var wire 14 % counter $end
$var wire 1 & o $end
$var wire 1 ' o$6 $end
$var wire 1 ( last_seen $end
$scope module U$0 $end
$var wire 1 ! clk $end
$var wire 1 " rst $end
$var wire 1 # i $end
$var wire 1 ' o $end
$var wire 1 ) prevInValid $end
$var wire 14 * count $end
$var wire 1 + state $end
$var wire 1 , prevIn $end
$upscope $end
$upscope $end
$upscope $end
$enddefinitions $end
#0
$dumpvars
0!
0"
0#
0$
b0 %
0&
0'
0(
0)
b10011100010000 *
0+
0,
$end
#500000000
1!
1)
b0 *
#1000000000
0!
#1500000000
1!
1$
1#
#2000000000
0!
#2500000000
1!
1+
1,
b10011100010000 *
1'
#3000000000
0!
#3500000000
1!
1&
1(
#4000000000
0!
#4500000000
1!
0&
b10011100010000 %
#5000000000
0!
#5500000000
1!
b10011100001111 %
#6000000000
0!
#6500000000
1!
b10011100001110 %
0$
0#
#7000000000
0!
#7500000000
1!
0,
b10011100001111 *
b10011100001101 %
#8000000000
0!
#8500000000
1!
b10011100001110 *
b10011100001100 %
#9000000000
0!
#9500000000
1!
b10011100001101 *
b10011100001011 %
#10000000000
0!
#10500000000
1!
b10011100001100 *
b10011100001010 %
#11000000000
0!
#11500000000
1!
b10011100001011 *
b10011100001001 %
1$
1#
#12000000000
0!
#12500000000
1!
1,
b10011100010000 *
b10011100001000 %
#13000000000
0!
#13500000000
1!
b10011100000111 %
#14000000000
0!
#14500000000
1!
b10011100000110 %
#15000000000
0!
#15500000000
1!
b10011100000101 %
#16000000000
0!
#16500000000
1!
b10011100000100 %
0$
0#
#17000000000
0!
#17500000000
1!
0,
b10011100001111 *
b10011100000011 %
#18000000000
0!
#18500000000
1!
b10011100001110 *
b10011100000010 %
#19000000000
0!
#19500000000
1!
b10011100001101 *
b10011100000001 %
#20000000000
0!
#20500000000
1!
b10011100001100 *
b10011100000000 %
#21000000000
0!
#21500000000
1!
b10011100001011 *
b10011011111111 %
#22000000000
0!
#22500000000
1!
b10011100001010 *
b10011011111110 %
#23000000000
0!
#23500000000
1!
b10011100001001 *
b10011011111101 %
#24000000000
0!
#24500000000
1!
b10011100001000 *
b10011011111100 %
#25000000000
-171
View File
@@ -1,171 +0,0 @@
$comment Generated by Amaranth $end
$date 2025-09-20 22:27:02.809849 $end
$timescale 1 fs $end
$scope module bench $end
$scope module top $end
$var wire 1 ! clk $end
$var wire 1 " rst $end
$var wire 1 # i $end
$var wire 1 $ i$3 $end
$var wire 1 % o $end
$var wire 1 & last_seen $end
$var wire 1 ' o$6 $end
$scope module U$0 $end
$var wire 1 ! clk $end
$var wire 1 " rst $end
$var wire 1 # i $end
$var wire 1 % o $end
$var wire 1 ( prevInValid $end
$var wire 14 ) count $end
$var wire 1 * state $end
$var wire 1 + prevIn $end
$upscope $end
$upscope $end
$upscope $end
$enddefinitions $end
#0
$dumpvars
0!
0"
0#
0$
0%
0&
0'
0(
b10011100010000 )
0*
0+
$end
#500000000
1!
b0 )
1(
#1000000000
0!
#1500000000
1!
1$
1#
#2000000000
0!
#2500000000
1!
b10011100010000 )
1*
1+
1%
#3000000000
0!
#3500000000
1!
1&
1'
#4000000000
0!
#4500000000
1!
#5000000000
0!
#5500000000
1!
#6000000000
0!
#6500000000
1!
0$
0#
#7000000000
0!
#7500000000
1!
b10011100001111 )
0+
#8000000000
0!
#8500000000
1!
b10011100001110 )
#9000000000
0!
#9500000000
1!
b10011100001101 )
#10000000000
0!
#10500000000
1!
b10011100001100 )
#11000000000
0!
#11500000000
1!
b10011100001011 )
1$
1#
#12000000000
0!
#12500000000
1!
b10011100010000 )
1+
#13000000000
0!
#13500000000
1!
#14000000000
0!
#14500000000
1!
#15000000000
0!
#15500000000
1!
#16000000000
0!
#16500000000
1!
0$
0#
#17000000000
0!
#17500000000
1!
b10011100001111 )
0+
#18000000000
0!
#18500000000
1!
b10011100001110 )
#19000000000
0!
#19500000000
1!
b10011100001101 )
#20000000000
0!
#20500000000
1!
b10011100001100 )
#21000000000
0!
#21500000000
1!
b10011100001011 )
#22000000000
0!
#22500000000
1!
b10011100001010 )
#23000000000
0!
#23500000000
1!
b10011100001001 )
#24000000000
0!
#24500000000
1!
b10011100001000 )
#25000000000
+1
View File
@@ -0,0 +1 @@
{}
+1
View File
@@ -0,0 +1 @@
{}
+33
View File
@@ -0,0 +1,33 @@
{
"file-explorer": true,
"global-search": true,
"switcher": true,
"graph": true,
"backlink": true,
"canvas": true,
"outgoing-link": true,
"tag-pane": true,
"footnotes": false,
"properties": false,
"page-preview": true,
"daily-notes": true,
"templates": true,
"note-composer": true,
"command-palette": true,
"slash-command": false,
"editor-status": true,
"bookmarks": true,
"markdown-importer": false,
"zk-prefixer": false,
"random-note": false,
"outline": true,
"word-count": true,
"slides": false,
"audio-recorder": false,
"workspaces": false,
"file-recovery": true,
"publish": false,
"sync": true,
"bases": true,
"webviewer": false
}
+167
View File
@@ -0,0 +1,167 @@
{
"main": {
"id": "6eef6b982305e97c",
"type": "split",
"children": [
{
"id": "ef28aa54abb02b7c",
"type": "tabs",
"children": [
{
"id": "dd2aafdfa4873c3e",
"type": "leaf",
"state": {
"type": "empty",
"state": {},
"icon": "lucide-file",
"title": "New tab"
}
}
]
}
],
"direction": "vertical"
},
"left": {
"id": "7dcb0dd958c47669",
"type": "split",
"children": [
{
"id": "5addbd6c8b989a49",
"type": "tabs",
"children": [
{
"id": "10f89da0d72538c0",
"type": "leaf",
"state": {
"type": "file-explorer",
"state": {
"sortOrder": "alphabetical",
"autoReveal": false
},
"icon": "lucide-folder-closed",
"title": "Files"
}
},
{
"id": "476834a62536c756",
"type": "leaf",
"state": {
"type": "search",
"state": {
"query": "",
"matchingCase": false,
"explainSearch": false,
"collapseAll": false,
"extraContext": false,
"sortOrder": "alphabetical"
},
"icon": "lucide-search",
"title": "Search"
}
},
{
"id": "ce54c42efc557a72",
"type": "leaf",
"state": {
"type": "bookmarks",
"state": {},
"icon": "lucide-bookmark",
"title": "Bookmarks"
}
}
]
}
],
"direction": "horizontal",
"width": 300
},
"right": {
"id": "87b1d8f1ca08108d",
"type": "split",
"children": [
{
"id": "69cbc257ba71f388",
"type": "tabs",
"children": [
{
"id": "739632e6a61f8d8e",
"type": "leaf",
"state": {
"type": "backlink",
"state": {
"collapseAll": false,
"extraContext": false,
"sortOrder": "alphabetical",
"showSearch": false,
"searchQuery": "",
"backlinkCollapsed": false,
"unlinkedCollapsed": true
},
"icon": "links-coming-in",
"title": "Backlinks"
}
},
{
"id": "e20c6e67aeb6eacb",
"type": "leaf",
"state": {
"type": "outgoing-link",
"state": {
"linksCollapsed": false,
"unlinkedCollapsed": true
},
"icon": "links-going-out",
"title": "Outgoing links"
}
},
{
"id": "858ad7c8f3ac4d90",
"type": "leaf",
"state": {
"type": "tag",
"state": {
"sortOrder": "frequency",
"useHierarchy": true,
"showSearch": false,
"searchQuery": ""
},
"icon": "lucide-tags",
"title": "Tags"
}
},
{
"id": "661ea018f1aa1171",
"type": "leaf",
"state": {
"type": "outline",
"state": {
"followCursor": false,
"showSearch": false,
"searchQuery": ""
},
"icon": "lucide-list",
"title": "Outline"
}
}
]
}
],
"direction": "horizontal",
"width": 300,
"collapsed": true
},
"left-ribbon": {
"hiddenItems": {
"switcher:Open quick switcher": false,
"graph:Open graph view": false,
"canvas:Create new canvas": false,
"daily-notes:Open today's daily note": false,
"templates:Insert template": false,
"command-palette:Open command palette": false,
"bases:Create new base": false
}
},
"active": "dd2aafdfa4873c3e",
"lastOpenFiles": []
}
-24
View File
@@ -1,24 +0,0 @@
This project attempts to emulate the Gamecube BroadBand Adapter in an FPGA. The following things need to happen.
- [x] [[Amaranth-Hdl project setup]]
- [x] Setup venv
- [x] Install packages
- [x] Flash Blinky on icebreaker
- [ ] Figuring out how to deal with [[external clocks]].
- [x] How to get a clock greater than 12Mhz needed to interface with 32Mhz EXI
- [x] PLL configured to 48Mhz
- [ ] ~~48Mhz oscillator onboard? ~~
- [ ] Check if Clock Domain Crossing is possible.
- [ ] Oversampeling approach was tedious but worked
- [ ] Interfacing with [[GameCube]]
- [ ] Figuring pinout of SP1.
- [ ] Unofficial gamecube docs?
- [ ] Make sure connecting [[SP1]] to IceBreaker is safe.
- [ ] Can we power the FPGA with the SP1?
- [ ] How much voltage do we get from SP1.
- [ ] How much current can we source?
- [ ] Figuring out basic [[EXI protocol]]
- [ ] What is the structure of the messages?
- [ ] How to know how long the message is
- [ ] Integrity checks?
- [ ] How fast do we need to respond to a message.
File diff suppressed because it is too large Load Diff
+107
View File
@@ -0,0 +1,107 @@
from amaranth import *
from amaranth.sim import Simulator
class SyncFF(Elaboratable):
"""Width-N multi-flop synchronizer from `src_domain` to `dst_domain`.
Use when the source is a level signal that may be stable for multiple destination
cycles. Not suitable for single-cycle pulses (use TogglePulseSync instead).
"""
def __init__(self, width=1, src_domain="src", dst_domain="dst"):
self.width = width
self.src_domain = src_domain
self.dst_domain = dst_domain
self.src = Signal(self.width)
self.dst = Signal(self.width)
def elaborate(self, platform):
m = Module()
reg_src = Signal(self.width)
ff0 = Signal(self.width)
ff1 = Signal(self.width)
m.d[self.src_domain] += reg_src.eq(self.src)
m.d[self.dst_domain] += ff0.eq(reg_src)
m.d[self.dst_domain] += ff1.eq(ff0)
m.d.comb += self.dst.eq(ff1)
return m
class TogglePulseSync(Elaboratable):
"""Reliable pulse transfer from `src_domain` into `dst_domain`.
- Source toggles `toggle` whenever an event occurs.
- Destination synchronizes the toggle and detects edges.
Guarantees ordering and no lost pulses for single-bit events.
"""
def __init__(self, src_domain="src", dst_domain="dst"):
self.src_domain = src_domain
self.dst_domain = dst_domain
self.src_pulse = Signal()
self.dst_pulse = Signal()
def elaborate(self, platform):
m = Module()
toggle = Signal()
sync0 = Signal()
sync1 = Signal()
prev = Signal()
edge = Signal()
# Source domain: flip the toggle when a pulse arrives
m.d[self.src_domain] += If(self.src_pulse, toggle.eq(~toggle))
# Destination domain: two-flop synchronize the toggle
m.d[self.dst_domain] += sync0.eq(toggle)
m.d[self.dst_domain] += sync1.eq(sync0)
# Detect the change in the destination domain
m.d[self.dst_domain] += edge.eq(sync1 ^ prev)
m.d[self.dst_domain] += prev.eq(sync1)
m.d.comb += self.dst_pulse.eq(edge)
return m
def _sim_toggle_pulse():
"""Simple simulation that drives pulses on the source domain and prints detections on the destination domain."""
top = Module()
t = TogglePulseSync(src_domain="src", dst_domain="dst")
top.submodules.t = t
sim = Simulator(top)
# Create two asynchronous clocks (periods chosen arbitrarily for the sim)
sim.add_clock(1e-6, domain="src")
sim.add_clock(1.5e-6, domain="dst")
def process():
# Wait a little, then generate three source pulses at different phases
for _ in range(5):
yield
for i in range(3):
yield t.src_pulse.eq(1)
yield
yield t.src_pulse.eq(0)
# let the domains run for a few cycles
for _ in range(10):
dp = (yield t.dst_pulse)
if dp:
print(f"dst detected pulse at sim tick")
yield
# run a bit longer to observe behavior
for _ in range(20):
yield
sim.add_sync_process(process, domain="src")
sim.run_until(100e-6)
if __name__ == "__main__":
_sim_toggle_pulse()
+182
View File
@@ -0,0 +1,182 @@
from amaranth import *
from amaranth.sim import Simulator
def bin_to_gray(x):
return x ^ (x >> 1)
def gray_to_bin(g, width):
# convert gray to binary iteratively
b = 0
for i in range(width - 1, -1, -1):
if i == width - 1:
b |= ((g >> i) & 1) << i
else:
b |= (((b >> (i + 1)) & 1) ^ ((g >> i) & 1)) << i
return b
class AsyncFIFO(Elaboratable):
"""Parameterizable gray-pointer dual-clock FIFO.
- width: data width in bits
- depth: must be a power of two
- wdomain: write (source) domain name
- rdomain: read (destination) domain name
"""
def __init__(self, width=1, depth=16, wdomain="src", rdomain="dst"):
assert depth & (depth - 1) == 0
self.width = width
self.depth = depth
self.aw = (depth - 1).bit_length() # address width
self.wdomain = wdomain
self.rdomain = rdomain
# write-side interface
self.wdata = Signal(width)
self.w_en = Signal()
self.w_full = Signal()
# read-side interface
self.rdata = Signal(width)
self.r_en = Signal()
self.r_valid = Signal()
self.r_empty = Signal()
def elaborate(self, platform):
m = Module()
mem = Memory(width=self.width, depth=self.depth)
wp = mem.write_port(domain=self.wdomain)
rp = mem.read_port(domain=self.rdomain, transparent=False)
m.submodules += wp, rp
# pointers are AW+1 bits (extra MSB for wrap)
wbin = Signal(self.aw + 1)
wgray = Signal(self.aw + 1)
rbin = Signal(self.aw + 1)
rgray = Signal(self.aw + 1)
# synchronized opposing domain gray pointers
rgray_sync0 = Signal(self.aw + 1)
rgray_sync1 = Signal(self.aw + 1)
wgray_sync0 = Signal(self.aw + 1)
wgray_sync1 = Signal(self.aw + 1)
# write domain logic
with m.Domain(self.wdomain):
waddr = Signal(self.aw)
next_wbin = Signal(self.aw + 1)
next_wgray = Signal(self.aw + 1)
# compute next pointer
m.d.comb += next_wbin.eq(wbin + self.w_en)
m.d.comb += next_wgray.eq(next_wbin ^ (next_wbin >> 1))
# synchronize rgray into write domain (two flops per bit)
m.d.comb += []
for i in range(self.aw + 1):
m.d[self.wdomain] += rgray_sync0[i].eq(rgray[i])
m.d[self.wdomain] += rgray_sync1[i].eq(rgray_sync0[i])
# full detection: next_wgray equals rgray_sync with top two bits inverted
if self.aw >= 1:
top = self.aw
msb_cmp = Signal()
low_eq = Signal()
m.d.comb += low_eq.eq(next_wgray[top - 1:0] == rgray_sync1[top - 1:0])
m.d.comb += msb_cmp.eq((next_wgray[top] != rgray_sync1[top]) & (next_wgray[top - 1] != rgray_sync1[top - 1]))
m.d.comb += self.w_full.eq(low_eq & msb_cmp)
else:
# depth==2 special case
m.d.comb += self.w_full.eq(next_wgray != rgray_sync1)
# write to memory when enabled & not full
with m.If(self.w_en & ~self.w_full):
m.d[self.wdomain] += wp.addr.eq(wbin[self.aw - 1:0])
m.d[self.wdomain] += wp.data.eq(self.wdata)
m.d[self.wdomain] += wp.en.eq(1)
m.d[self.wdomain] += wbin.eq(next_wbin)
m.d[self.wdomain] += wgray.eq(next_wgray)
with m.Else():
m.d[self.wdomain] += wp.en.eq(0)
# read domain logic
with m.Domain(self.rdomain):
raddr = Signal(self.aw)
next_rbin = Signal(self.aw + 1)
next_rgray = Signal(self.aw + 1)
# compute next pointer
m.d.comb += next_rbin.eq(rbin + self.r_en)
m.d.comb += next_rgray.eq(next_rbin ^ (next_rbin >> 1))
# synchronize wgray into read domain
for i in range(self.aw + 1):
m.d[self.rdomain] += wgray_sync0[i].eq(wgray[i])
m.d[self.rdomain] += wgray_sync1[i].eq(wgray_sync0[i])
# empty detection
m.d.comb += self.r_empty.eq(rgray == wgray_sync1)
# read when enabled and not empty
with m.If(self.r_en & ~self.r_empty):
m.d[self.rdomain] += rp.addr.eq(rbin[self.aw - 1:0])
m.d[self.rdomain] += rp.en.eq(1)
m.d[self.rdomain] += rbin.eq(next_rbin)
m.d[self.rdomain] += rgray.eq(next_rgray)
m.d[self.rdomain] += self.r_valid.eq(1)
m.d[self.rdomain] += self.rdata.eq(rp.data)
with m.Else():
m.d[self.rdomain] += rp.en.eq(0)
m.d[self.rdomain] += self.r_valid.eq(0)
return m
def _sim_fifo():
top = Module()
fifo = AsyncFIFO(width=1, depth=16, wdomain="src", rdomain="dst")
top.submodules.fifo = fifo
sim = Simulator(top)
sim.add_clock(1e-6, domain="src")
sim.add_clock(1.7e-6, domain="dst")
def writer():
# write a sequence of bits (0..31 repeating pattern)
for i in range(32):
yield fifo.wdata.eq(i & 1)
yield fifo.w_en.eq(1)
yield
yield fifo.w_en.eq(0)
# allow some idle cycles
for _ in range((i % 3)):
yield
def reader():
seen = []
for _ in range(200):
# try to consume if not empty
empty = (yield fifo.r_empty)
if not empty:
yield fifo.r_en.eq(1)
yield
yield fifo.r_en.eq(0)
if (yield fifo.r_valid):
d = (yield fifo.rdata)
seen.append(d)
print(f"read: {d}")
else:
yield
print(f"total read: {len(seen)}")
sim.add_sync_process(writer, domain="src")
sim.add_sync_process(reader, domain="dst")
sim.run()
if __name__ == "__main__":
_sim_fifo()
+119
View File
@@ -0,0 +1,119 @@
"""IceBreaker (iCE40 UP5K) vendor-backed async FIFO example.
This module uses Amaranth's `Memory` with separate write/read ports in different
clock domains. With the icestorm toolchain the memory typically maps to
`SB_RAM40_4K` block RAMs. The control (full/empty) is implemented with
gray-pointer logic and two-stage synchronization of opposing pointers.
Notes:
- This prefers block RAM for storage (small LUT usage, lower power).
- The write/read ports are in independent domains; backend maps ports to
dual-port RAM primitives when available.
"""
from amaranth import *
class Ice40AsyncFIFO(Elaboratable):
def __init__(self, depth=256, wdomain="src", rdomain="dst"):
assert depth & (depth - 1) == 0, "depth must be power of two"
self.depth = depth
self.aw = (depth - 1).bit_length()
self.wdomain = wdomain
self.rdomain = rdomain
# serial (1-bit) interface
self.wdata = Signal()
self.w_en = Signal()
self.w_full = Signal()
self.rdata = Signal()
self.r_en = Signal()
self.r_valid = Signal()
self.r_empty = Signal()
def elaborate(self, platform):
m = Module()
# single-bit-wide memory mapped to vendor BRAMs by the backend
mem = Memory(width=1, depth=self.depth)
wp = mem.write_port(domain=self.wdomain)
rp = mem.read_port(domain=self.rdomain, transparent=False)
m.submodules += wp, rp
# pointers (aw+1 bits to include wrap bit)
wbin = Signal(self.aw + 1)
wgray = Signal(self.aw + 1)
rbin = Signal(self.aw + 1)
rgray = Signal(self.aw + 1)
# sync registers for opposing pointers (two-stage)
rgray_sync0 = Signal(self.aw + 1)
rgray_sync1 = Signal(self.aw + 1)
wgray_sync0 = Signal(self.aw + 1)
wgray_sync1 = Signal(self.aw + 1)
# write-side
with m.Domain(self.wdomain):
next_wbin = Signal(self.aw + 1)
next_wgray = Signal(self.aw + 1)
m.d.comb += next_wbin.eq(wbin + self.w_en)
m.d.comb += next_wgray.eq(next_wbin ^ (next_wbin >> 1))
# sync read pointer into write domain
for i in range(self.aw + 1):
m.d[self.wdomain] += rgray_sync0[i].eq(rgray[i])
m.d[self.wdomain] += rgray_sync1[i].eq(rgray_sync0[i])
# full detection (standard gray-pointer trick)
top = self.aw
low_eq = Signal()
msb_cmp = Signal()
m.d.comb += low_eq.eq(next_wgray[top - 1:0] == rgray_sync1[top - 1:0])
m.d.comb += msb_cmp.eq((next_wgray[top] != rgray_sync1[top]) & (next_wgray[top - 1] != rgray_sync1[top - 1]))
m.d.comb += self.w_full.eq(low_eq & msb_cmp)
# perform write
with m.If(self.w_en & ~self.w_full):
m.d[self.wdomain] += wp.addr.eq(wbin[self.aw - 1:0])
m.d[self.wdomain] += wp.data.eq(self.wdata)
m.d[self.wdomain] += wp.en.eq(1)
m.d[self.wdomain] += wbin.eq(next_wbin)
m.d[self.wdomain] += wgray.eq(next_wgray)
with m.Else():
m.d[self.wdomain] += wp.en.eq(0)
# read-side
with m.Domain(self.rdomain):
next_rbin = Signal(self.aw + 1)
next_rgray = Signal(self.aw + 1)
m.d.comb += next_rbin.eq(rbin + self.r_en)
m.d.comb += next_rgray.eq(next_rbin ^ (next_rbin >> 1))
# sync write pointer into read domain
for i in range(self.aw + 1):
m.d[self.rdomain] += wgray_sync0[i].eq(wgray[i])
m.d[self.rdomain] += wgray_sync1[i].eq(wgray_sync0[i])
m.d.comb += self.r_empty.eq(rgray == wgray_sync1)
with m.If(self.r_en & ~self.r_empty):
m.d[self.rdomain] += rp.addr.eq(rbin[self.aw - 1:0])
m.d[self.rdomain] += rp.en.eq(1)
m.d[self.rdomain] += rbin.eq(next_rbin)
m.d[self.rdomain] += rgray.eq(next_rgray)
m.d[self.rdomain] += self.r_valid.eq(1)
m.d[self.rdomain] += self.rdata.eq(rp.data)
with m.Else():
m.d[self.rdomain] += rp.en.eq(0)
m.d[self.rdomain] += self.r_valid.eq(0)
return m
if __name__ == "__main__":
# Quick smoke-check: instantiate and print fragment
from amaranth.back import verilog
fifo = Ice40AsyncFIFO(depth=256)
print(verilog.convert(fifo, ports=[fifo.wdata, fifo.w_en, fifo.w_full, fifo.rdata, fifo.r_en, fifo.r_valid, fifo.r_empty]))
View File
+617
View File
@@ -0,0 +1,617 @@
"""BBA register file — EXI domain.
Decodes EXI transactions (2-byte header + N data bytes), reads/writes the BBA
register space, and owns all AsyncFIFO / PulseSynchronizer CDC primitives.
Transaction header format
--------------------------
Byte 0 [7] write_flag
[6:0] addr[12:6]
Byte 1 [7:2] addr[5:0]
[1:0] xfer_len1 (0=1B, 1=2B, 2=3B, 3=4B)
Addresses 0x00000x00FF : register file (sparse individual Signals, exi domain).
Addresses 0x01000x1FFF : SPRAM ring buffer (sync domain, prefetch FIFOs).
"""
from amaranth import *
from amaranth.lib.cdc import PulseSynchronizer
from amaranth.lib.fifo import AsyncFIFO
__all__ = ["BBARegisterFile"]
# Register addresses
_NCRA = 0x00
_IMR = 0x08
_IR = 0x09
_RWP_LO = 0x16
_RWP_HI = 0x17
_RRP_LO = 0x18
_RRP_HI = 0x19
_PAR0 = 0x20
_PAR1 = 0x21
_PAR2 = 0x22
_PAR3 = 0x23
_PAR4 = 0x24
_PAR5 = 0x25
_NWAYS = 0x31
_HIPR = 0x3A
_TWD_LO = 0x34
_TWD_HI = 0x35
_TXDATA = 0x48
# Read-only hardcoded values
_NWAYS_VAL = 0x17
_HIPR_VAL = 0x01
# Device ID returned on first 4-byte read of addr 0x0000
_DEVICE_ID = [0x04, 0x02, 0x02, 0x00]
class BBARegisterFile(Elaboratable):
"""EXI transaction decoder and BBA register file with CDC bridges.
Sync-domain FIFO/pulse ports are wired by BBATop to the sync-domain modules.
"""
def __init__(self):
# ── EXI byte-stream interface (exi domain, from/to ExiCapture) ────
# RX: received bytes (header + write data + read dummies) — FWFT read
# side of ExiCapture's rx_fifo.
self.rx_data = Signal(8)
self.rx_rdy = Signal()
self.rx_en = Signal()
# TX: response bytes pushed proactively into ExiCapture's tx_fifo.
self.tx_data = Signal(8)
self.tx_en = Signal()
self.tx_rdy = Signal()
# High while an EXI transaction is in progress (from ExiCapture).
# SPRAM reads stream until this deasserts → supports variable-length
# (DMA) bulk reads, not just ≤4-byte immediate transfers.
self.cs_active = Signal()
# ── Interrupt (exi domain) ────────────────────────────────────────
self.exi_int_n = Signal(init=1)
# ── PAR output (for forwarding to W5500 as source MAC) ───────────
self.par = Signal(48) # PAR0-5 packed: PAR0 in low byte par[0:8]
# NCRA[3] = SR (start receive) bit — gates the RX ring-buffer path.
self.ncra_sr = Signal()
# ── CDC FIFO sync-domain sides (wired by BBATop) ──────────────────
# SPRAM request exi→sync: sync reads these
self.spram_req_r_data = Signal(16)
self.spram_req_r_en = Signal()
self.spram_req_r_rdy = Signal()
# SPRAM response sync→exi: sync writes these
self.spram_rsp_w_data = Signal(8)
self.spram_rsp_w_en = Signal()
self.spram_rsp_w_rdy = Signal()
# TX bytes exi→sync: sync reads these
self.tx_bytes_r_data = Signal(8)
self.tx_bytes_r_en = Signal()
self.tx_bytes_r_rdy = Signal()
# TX ctrl (frame length) exi→sync: sync reads these
self.tx_ctrl_r_data = Signal(16)
self.tx_ctrl_r_en = Signal()
self.tx_ctrl_r_rdy = Signal()
# RX write-pointer update sync→exi: sync writes these
self.rx_wptr_w_data = Signal(8)
self.rx_wptr_w_en = Signal()
self.rx_wptr_w_rdy = Signal()
# RX read-pointer update exi→sync: sync reads these
self.rx_rptr_r_data = Signal(8)
self.rx_rptr_r_en = Signal()
self.rx_rptr_r_rdy = Signal()
# PulseSynchronizer ports (exi↔sync)
self.ncra_rst_o = Signal() # exi→sync
self.rx_irq_i = Signal() # sync→exi
self.tx_irq_i = Signal() # sync→exi
def elaborate(self, platform):
m = Module()
# ── CDC FIFOs ────────────────────────────────────────────────────
spram_req = AsyncFIFO(width=16, depth=4, w_domain="exi", r_domain="sync")
spram_rsp = AsyncFIFO(width=8, depth=4, w_domain="sync", r_domain="exi")
tx_bytes = AsyncFIFO(width=8, depth=16, w_domain="exi", r_domain="sync")
tx_ctrl = AsyncFIFO(width=16, depth=4, w_domain="exi", r_domain="sync")
rx_wptr = AsyncFIFO(width=8, depth=4, w_domain="sync", r_domain="exi")
rx_rptr = AsyncFIFO(width=8, depth=4, w_domain="exi", r_domain="sync")
m.submodules.spram_req = spram_req
m.submodules.spram_rsp = spram_rsp
m.submodules.tx_bytes = tx_bytes
m.submodules.tx_ctrl = tx_ctrl
m.submodules.rx_wptr = rx_wptr
m.submodules.rx_rptr = rx_rptr
# Expose sync-domain FIFO sides
m.d.comb += [
self.spram_req_r_data .eq(spram_req.r_data),
spram_req.r_en .eq(self.spram_req_r_en),
self.spram_req_r_rdy .eq(spram_req.r_rdy),
spram_rsp.w_data .eq(self.spram_rsp_w_data),
spram_rsp.w_en .eq(self.spram_rsp_w_en),
self.spram_rsp_w_rdy .eq(spram_rsp.w_rdy),
self.tx_bytes_r_data .eq(tx_bytes.r_data),
tx_bytes.r_en .eq(self.tx_bytes_r_en),
self.tx_bytes_r_rdy .eq(tx_bytes.r_rdy),
self.tx_ctrl_r_data .eq(tx_ctrl.r_data),
tx_ctrl.r_en .eq(self.tx_ctrl_r_en),
self.tx_ctrl_r_rdy .eq(tx_ctrl.r_rdy),
rx_wptr.w_data .eq(self.rx_wptr_w_data),
rx_wptr.w_en .eq(self.rx_wptr_w_en),
self.rx_wptr_w_rdy .eq(rx_wptr.w_rdy),
self.rx_rptr_r_data .eq(rx_rptr.r_data),
rx_rptr.r_en .eq(self.rx_rptr_r_en),
self.rx_rptr_r_rdy .eq(rx_rptr.r_rdy),
]
# ── PulseSynchronizers ───────────────────────────────────────────
ncra_rst_ps = PulseSynchronizer(i_domain="exi", o_domain="sync")
rx_irq_ps = PulseSynchronizer(i_domain="sync", o_domain="exi")
tx_irq_ps = PulseSynchronizer(i_domain="sync", o_domain="exi")
m.submodules.ncra_rst_ps = ncra_rst_ps
m.submodules.rx_irq_ps = rx_irq_ps
m.submodules.tx_irq_ps = tx_irq_ps
m.d.comb += [
self.ncra_rst_o .eq(ncra_rst_ps.o),
rx_irq_ps.i .eq(self.rx_irq_i),
tx_irq_ps.i .eq(self.tx_irq_i),
]
# ── Register file (sparse individual Signals, exi domain) ────────
# Only the registers actually read/written by the GC or sync domain.
# Writes to unknown addresses are silently ignored; reads return 0.
r_ncra = Signal(8)
r_imr = Signal(8)
r_ir = Signal(8)
r_rwp_lo = Signal(8)
r_rrp_lo = Signal(8)
# PAR05 reset to a valid Nintendo OUI MAC (00:09:BF:00:00:01) so the
# device has a sane source MAC even before the GC driver programs its
# own. PAR0 is the first MAC octet.
_par_reset = [0x00, 0x09, 0xBF, 0x00, 0x00, 0x01]
r_par = Array([Signal(8, name=f"par{i}", init=_par_reset[i])
for i in range(6)])
r_twd_lo = Signal(8)
r_twd_hi = Signal(8)
# PAR packed output: PAR0 in the LOW byte (par[0:8]). The W5500 master
# reads mac_shadow[i] = par[i*8:(i+1)*8], so this puts PAR0 first in the
# SHAR write — i.e. PAR0 is the first MAC octet on the wire.
m.d.comb += self.par.eq(Cat(
r_par[0], r_par[1], r_par[2], r_par[3], r_par[4], r_par[5],
))
m.d.comb += self.ncra_sr.eq(r_ncra[3]) # start-receive bit
# ── Transaction state ────────────────────────────────────────────
hdr0 = Signal(8)
addr = Signal(13)
is_write = Signal()
xfer_len = Signal(2) # 0=1B … 3=4B
byte_ctr = Signal(2)
tx_frame_len = Signal(16)
# True until first NCRA reset write: return device ID on addr=0 reads
id_phase = Signal(init=1)
# Per-byte SPRAM read handshake (register-read path): sp_req marks a
# request in flight; drain_ctr counts the read-phase dummy bytes.
sp_req = Signal()
drain_ctr = Signal(2)
# SPRAM streaming-read state (DMA / variable-length reads):
# sp_addr — next SPRAM byte address to request (auto-increments)
# outstanding — SPRAM requests issued but whose responses are not yet
# popped (bounds prefetch and is drained at end)
sp_addr = Signal(13)
outstanding = Signal(4)
SP_LIMIT = 4 # max prefetch depth in flight
# Effective address of the current data byte — a REGISTERED running
# pointer (set to the base in HEADER1, incremented per byte). Keeping
# it registered keeps the 13-bit adder off the combinational path that
# feeds the read-response mux → tx_fifo write data.
eff_addr = Signal(13)
rd_sel = eff_addr[0:8]
# ── Combinational read-response value (non-SPRAM) ────────────────
reg_rdval = Signal(8)
with m.Switch(rd_sel):
with m.Case(_NCRA): m.d.comb += reg_rdval.eq(r_ncra)
with m.Case(_IMR): m.d.comb += reg_rdval.eq(r_imr)
with m.Case(_IR): m.d.comb += reg_rdval.eq(r_ir)
with m.Case(_RWP_LO): m.d.comb += reg_rdval.eq(r_rwp_lo)
with m.Case(_RRP_LO): m.d.comb += reg_rdval.eq(r_rrp_lo)
with m.Case(_PAR0, _PAR1, _PAR2, _PAR3, _PAR4, _PAR5):
m.d.comb += reg_rdval.eq(r_par[eff_addr[0:3]])
with m.Case(_TWD_LO): m.d.comb += reg_rdval.eq(r_twd_lo)
with m.Case(_TWD_HI): m.d.comb += reg_rdval.eq(r_twd_hi)
with m.Case(_NWAYS): m.d.comb += reg_rdval.eq(_NWAYS_VAL)
with m.Case(_HIPR): m.d.comb += reg_rdval.eq(_HIPR_VAL)
with m.Default(): m.d.comb += reg_rdval.eq(0)
# Device-ID bytes (addr 0 read while id_phase): 0x04 0x02 0x02 0x00
devid = Signal(8)
with m.Switch(byte_ctr):
with m.Case(0): m.d.comb += devid.eq(0x04)
with m.Case(1): m.d.comb += devid.eq(0x02)
with m.Case(2): m.d.comb += devid.eq(0x02)
with m.Case(3): m.d.comb += devid.eq(0x00)
rd_val = Signal(8) # response for the current non-SPRAM read byte
with m.If((addr == 0) & id_phase):
m.d.comb += rd_val.eq(devid)
with m.Else():
m.d.comb += rd_val.eq(reg_rdval)
# ── Default strobes ──────────────────────────────────────────────
m.d.exi += [
spram_req.w_en .eq(0),
tx_bytes.w_en .eq(0),
tx_ctrl.w_en .eq(0),
rx_rptr.w_en .eq(0),
rx_wptr.r_en .eq(0),
ncra_rst_ps.i .eq(0),
]
m.d.comb += [
self.rx_en .eq(0),
self.tx_en .eq(0),
self.tx_data.eq(0),
# Combinational so the FIFO advances in the SAME cycle as the pop —
# a registered r_en would let `pop` re-fire on the same byte.
spram_rsp.r_en.eq(0),
]
# ── Transaction FSM (proactive push/pull over byte FIFOs) ────────
# The SPI bit cadence lives in the capture domain; here we just consume
# received bytes and, for reads, push response bytes into tx_fifo during
# the EXI clock-idle gap before the GC clocks the data phase.
with m.FSM(domain="exi", name="exi_fsm"):
with m.State("HEADER0"):
with m.If(self.rx_rdy):
m.d.comb += self.rx_en.eq(1)
m.d.exi += hdr0.eq(self.rx_data)
m.next = "HEADER1"
with m.State("HEADER1"):
with m.If(self.rx_rdy):
m.d.comb += self.rx_en.eq(1)
new_addr = Cat(self.rx_data[2:8], hdr0[0:7]) # 13-bit addr
new_len = self.rx_data[0:2]
new_write = hdr0[7]
m.d.exi += addr.eq(new_addr)
m.d.exi += eff_addr.eq(new_addr) # running pointer init
m.d.exi += xfer_len.eq(new_len)
m.d.exi += is_write.eq(new_write)
m.d.exi += byte_ctr.eq(0)
m.d.exi += sp_req.eq(0)
m.d.exi += drain_ctr.eq(0)
with m.If(new_write):
m.next = "WRITE"
with m.Elif(new_addr >= 0x100):
# SPRAM region: stream until CS deasserts (DMA-capable).
m.d.exi += sp_addr.eq(new_addr)
m.d.exi += outstanding.eq(0)
m.next = "SPRAM_STREAM"
with m.Else():
m.next = "REG_READ"
with m.State("WRITE"):
# Consume xfer_len+1 data bytes, writing the register file.
with m.If(self.rx_rdy):
m.d.comb += self.rx_en.eq(1)
with m.Switch(rd_sel):
with m.Case(_NCRA):
m.d.exi += r_ncra.eq(self.rx_data)
with m.If(self.rx_data[0]):
m.d.exi += r_ncra[0].eq(0) # RESET self-clears
m.d.exi += ncra_rst_ps.i.eq(1)
m.d.exi += id_phase.eq(0)
with m.If(self.rx_data[1:3].any()):
with m.If(tx_ctrl.w_rdy):
m.d.exi += tx_ctrl.w_data.eq(tx_frame_len)
m.d.exi += tx_ctrl.w_en.eq(1)
with m.Case(_IMR):
m.d.exi += r_imr.eq(self.rx_data)
with m.Case(_IR):
m.d.exi += r_ir.eq(r_ir & ~self.rx_data) # write-1-clear
with m.Case(_RRP_LO):
m.d.exi += r_rrp_lo.eq(self.rx_data)
with m.If(rx_rptr.w_rdy):
m.d.exi += rx_rptr.w_data.eq(self.rx_data)
m.d.exi += rx_rptr.w_en.eq(1)
with m.Case(_PAR0, _PAR1, _PAR2, _PAR3, _PAR4, _PAR5):
m.d.exi += r_par[eff_addr[0:3]].eq(self.rx_data)
with m.Case(_TWD_LO):
m.d.exi += r_twd_lo.eq(self.rx_data)
m.d.exi += tx_frame_len[0:8].eq(self.rx_data)
with m.Case(_TWD_HI):
m.d.exi += r_twd_hi.eq(self.rx_data)
m.d.exi += tx_frame_len[8:16].eq(self.rx_data)
with m.Case(_TXDATA):
with m.If(tx_bytes.w_rdy):
m.d.exi += tx_bytes.w_data.eq(self.rx_data)
m.d.exi += tx_bytes.w_en.eq(1)
# All other addresses silently ignored
with m.If(byte_ctr == xfer_len):
m.next = "HEADER0"
with m.Else():
m.d.exi += byte_ctr.eq(byte_ctr + 1)
m.d.exi += eff_addr.eq(eff_addr + 1)
with m.State("REG_READ"):
# Register / device-ID read (addr < 0x100): value available
# immediately, bounded by the header's xfer_len (≤4 bytes).
with m.If(self.tx_rdy):
m.d.comb += self.tx_data.eq(rd_val)
m.d.comb += self.tx_en.eq(1)
with m.If(byte_ctr == xfer_len):
m.next = "READ_DRAIN"
with m.Else():
m.d.exi += byte_ctr.eq(byte_ctr + 1)
m.d.exi += eff_addr.eq(eff_addr + 1)
with m.State("READ_DRAIN"):
# Discard the xfer_len+1 dummy bytes the GC clocks while reading.
with m.If(self.rx_rdy):
m.d.comb += self.rx_en.eq(1)
with m.If(drain_ctr == xfer_len):
m.next = "HEADER0"
with m.Else():
m.d.exi += drain_ctr.eq(drain_ctr + 1)
with m.State("SPRAM_STREAM"):
# Stream SPRAM bytes until CS deasserts — handles both ≤4-byte
# immediate reads and arbitrary-length DMA reads uniformly.
# Issue read requests ahead (prefetch, bounded by SP_LIMIT) and
# push responses into tx_fifo; the capture domain pops them as
# the GC clocks. Drain rx dummies as they arrive.
issue = Signal()
pop = Signal()
m.d.comb += issue.eq(self.cs_active & spram_req.w_rdy
& (outstanding < SP_LIMIT))
m.d.comb += pop.eq(spram_rsp.r_rdy & self.tx_rdy)
with m.If(issue):
m.d.exi += spram_req.w_data.eq(sp_addr)
m.d.exi += spram_req.w_en.eq(1)
m.d.exi += sp_addr.eq(sp_addr + 1)
with m.If(pop):
m.d.comb += self.tx_data.eq(spram_rsp.r_data)
m.d.comb += self.tx_en.eq(1)
m.d.comb += spram_rsp.r_en.eq(1)
m.d.exi += outstanding.eq(outstanding + issue - pop)
with m.If(self.rx_rdy):
m.d.comb += self.rx_en.eq(1) # drain dummy bytes
with m.If(~self.cs_active):
m.next = "SPRAM_END"
with m.State("SPRAM_END"):
# CS deasserted: drain in-flight SPRAM responses and rx dummies,
# then idle. Leftover prefetch in tx_fifo is flushed by
# ExiCapture on the next CS assertion.
with m.If(spram_rsp.r_rdy):
m.d.comb += spram_rsp.r_en.eq(1)
m.d.exi += outstanding.eq(outstanding - 1)
with m.If(self.rx_rdy):
m.d.comb += self.rx_en.eq(1)
with m.If((outstanding == 0) & ~self.rx_rdy & ~spram_rsp.r_rdy):
m.next = "HEADER0"
# ── Interrupt output ─────────────────────────────────────────────
m.d.exi += self.exi_int_n.eq(~(r_ir & r_imr).any())
# ── Consume RWP updates from sync domain ──────────────────────────
with m.If(rx_wptr.r_rdy):
m.d.exi += rx_wptr.r_en.eq(1)
m.d.exi += r_rwp_lo.eq(rx_wptr.r_data)
# ── PulseSynchronizer arrivals ────────────────────────────────────
with m.If(rx_irq_ps.o):
m.d.exi += r_ir[1].eq(1) # RI bit
with m.If(tx_irq_ps.o):
m.d.exi += r_ir[2].eq(1) # TI bit
m.d.exi += r_ncra[1:3].eq(0) # clear ST bits
return m
# ── Testbench ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
import sys, os
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from amaranth.sim import Simulator, Period
reg = BBARegisterFile()
# Drive the byte-stream interface directly (the SPI bit cadence and FIFOs
# live in ExiCapture; here we model the byte producer/consumer).
async def push_rx(ctx, b):
"""Present one received byte and wait for the register file to pop it."""
ctx.set(reg.rx_data, b)
ctx.set(reg.rx_rdy, 1)
while True:
en = ctx.get(reg.rx_en)
await ctx.tick("exi")
if en:
break
ctx.set(reg.rx_rdy, 0)
async def collect_tx(ctx, n):
"""Collect n response bytes pushed by the register file (bounded)."""
out = []
for _ in range(3000):
if ctx.get(reg.tx_en):
out.append(ctx.get(reg.tx_data))
if len(out) >= n:
break
await ctx.tick("exi")
return out
async def exi_read(ctx, addr, length=1):
hdr0 = (addr >> 6) & 0x7F
hdr1 = ((addr & 0x3F) << 2) | (length - 1)
await push_rx(ctx, hdr0)
await push_rx(ctx, hdr1)
result = await collect_tx(ctx, length) # READ pushes `length` bytes
for _ in range(length): # READ_DRAIN dummies
await push_rx(ctx, 0x00)
return result
async def exi_write(ctx, addr, data):
hdr0 = 0x80 | ((addr >> 6) & 0x7F)
hdr1 = ((addr & 0x3F) << 2) | (len(data) - 1)
await push_rx(ctx, hdr0)
await push_rx(ctx, hdr1)
for b in data:
await push_rx(ctx, b)
# SPRAM contents the streaming-read test reads back (byte i = 0xA0+i).
spram_mem = {0x100 + i: (0xA0 + i) & 0xFF for i in range(64)}
async def spram_model(ctx):
"""Model the SPRAM (sync side): answer spram_req with mem[addr].
One request at a time, with cleanly-pulsed r_en/w_en so the FIFO pop
and the response push stay in lock-step (no double-response races).
"""
state = "POP"
held = 0
async for vals in ctx.tick("sync").sample(
reg.spram_req_r_rdy, reg.spram_req_r_data, reg.spram_rsp_w_rdy):
rdy, addr, rsp_rdy = vals[-3:]
ctx.set(reg.spram_req_r_en, 0)
ctx.set(reg.spram_rsp_w_en, 0)
if state == "POP":
if rdy:
held = spram_mem.get(addr, 0)
ctx.set(reg.spram_req_r_en, 1) # consume the request
state = "RESP"
else: # RESP
if rsp_rdy:
ctx.set(reg.spram_rsp_w_data, held)
ctx.set(reg.spram_rsp_w_en, 1) # deliver the response
state = "POP"
errors = []
async def testbench(ctx):
ctx.set(reg.tx_rdy, 1) # tx_fifo always has room in this model
await ctx.tick("exi").repeat(8)
# T1: Device ID (addr=0, 4-byte read)
result = await exi_read(ctx, 0x0000, length=4)
if result != _DEVICE_ID:
errors.append(f"T1 device ID: expected {_DEVICE_ID}, got {result}")
print(f"T1 device ID: {[f'0x{b:02X}' for b in result]}")
await ctx.tick("exi").repeat(4)
# T2: Write and read back PAR0-PAR3
await exi_write(ctx, _PAR0, [0xDE, 0xAD, 0xBE, 0xEF])
await ctx.tick("exi").repeat(4)
result = await exi_read(ctx, _PAR0, length=4)
if result != [0xDE, 0xAD, 0xBE, 0xEF]:
errors.append(f"T2 PAR readback: {result}")
print(f"T2 PAR0-3: {[f'0x{b:02X}' for b in result]}")
await ctx.tick("exi").repeat(4)
# T3: NWAYS hardcoded 0x17
result = await exi_read(ctx, _NWAYS, length=1)
if result != [0x17]:
errors.append(f"T3 NWAYS: expected 0x17, got {result}")
print(f"T3 NWAYS: 0x{result[0]:02X}")
await ctx.tick("exi").repeat(4)
# T4: HIPR hardcoded 0x01
result = await exi_read(ctx, _HIPR, length=1)
if result != [0x01]:
errors.append(f"T4 HIPR: expected 0x01, got {result}")
print(f"T4 HIPR: 0x{result[0]:02X}")
await ctx.tick("exi").repeat(4)
# T5: IMR write, rx_irq pulse, INT_N asserts, then IR clear
await exi_write(ctx, _IMR, [0x02]) # enable RI (bit 1)
await ctx.tick("exi").repeat(4)
ctx.set(reg.rx_irq_i, 1)
await ctx.tick("sync").repeat(1)
ctx.set(reg.rx_irq_i, 0)
await ctx.tick("exi").repeat(12) # wait for PS propagation
int_n = ctx.get(reg.exi_int_n)
if int_n != 0:
errors.append(f"T5 INT_N after RI: expected 0, got {int_n}")
print(f"T5 INT_N after RI pulse: {int_n} (want 0)")
await exi_write(ctx, _IR, [0x02]) # write-1-to-clear RI
await ctx.tick("exi").repeat(4)
int_n = ctx.get(reg.exi_int_n)
if int_n != 1:
errors.append(f"T5 INT_N after clear: expected 1, got {int_n}")
print(f"T5 INT_N after IR clear: {int_n} (want 1)")
# T6: streaming SPRAM read (DMA) — read N>4 bytes from 0x100 by holding
# cs_active and clocking past the header's 4-byte length field.
N = 12
ctx.set(reg.cs_active, 1)
await push_rx(ctx, 0x04) # hdr0 → addr[12:6]; addr 0x100, read
await push_rx(ctx, 0x00) # hdr1 → addr[5:0]=0, len field ignored
got = []
for _ in range(5000):
if ctx.get(reg.tx_en):
got.append(ctx.get(reg.tx_data))
if len(got) >= N:
break
await ctx.tick("exi")
ctx.set(reg.cs_active, 0) # end the transaction
await ctx.tick("exi").repeat(40) # let SPRAM_END drain/clean up
want = [spram_mem[0x100 + i] for i in range(N)]
print(f"T6 DMA read {N}B: {[f'0x{b:02X}' for b in got]}")
if got != want:
errors.append(f"T6 streaming SPRAM read: got {got}, want {want}")
# T7: a normal register read still works after the streaming transaction
# (FSM cleaned up and returned to HEADER0)
result = await exi_read(ctx, _NWAYS, length=1)
if result != [0x17]:
errors.append(f"T7 NWAYS after DMA: got {result}")
print(f"T7 NWAYS after DMA read: 0x{result[0]:02X}")
sim = Simulator(reg)
sim.add_clock(Period(MHz=24), domain="exi")
sim.add_clock(Period(MHz=24), domain="sync")
sim.add_testbench(testbench)
sim.add_process(spram_model)
sim.run()
if errors:
print("\nFAILURES:")
for e in errors:
print(" ", e)
sys.exit(1)
else:
print("\nAll tests passed.")
+533
View File
@@ -0,0 +1,533 @@
"""BBATop — top-level elaboratable for the GC BBA FPGA replacement.
Clock domains
-------------
capture : 54 MHz, from 12 MHz crystal via SB_PLL40_PAD (DIVR=0 DIVF=71 DIVQ=4)
exi/sync : 24 MHz, from the iCE40UP5K internal SB_HFOSC (÷2, CLKHF_DIV=0b01)
Submodule instantiation and signal wiring
-----------------------------------------
See CLAUDE.md "Module Breakdown" and "CDC Signal Inventory" for the full list.
"""
from amaranth import *
from exi_bba.exi_capture import ExiCapture
from exi_bba.bba_register_file import BBARegisterFile
from exi_bba.spram_arbiter import SPRAMArbiter
from exi_bba.rx_frame_assembler import RXFrameAssembler
from exi_bba.tx_frame_drain import TXFrameDrain
from exi_bba.w5500_spi_master import W5500SPIMaster
from exi_bba.w5100_parallel_master import W5100ParallelMaster
from exi_bba.status_panel import StatusPanel
from amaranth.lib.cdc import FFSynchronizer
__all__ = ["BBATop"]
class BBATop(Elaboratable):
"""Top-level module. Wires all submodules and defines clock domains.
External ports (exposed for platform or testbench connection)
-------------------------------------------------------------
EXI / GC interface (SPI Mode 3)
exi_clk / exi_mosi / exi_cs_n : inputs from GC
exi_miso : output to GC
int_n : interrupt output (active low)
W5500 SPI interface (SPI Mode 0)
w5500_clk / w5500_mosi / w5500_cs_n : outputs to W5500
w5500_miso : input from W5500
w5500_int_n : W5500 interrupt (input, active low)
w5500_rst_n : W5500 hardware reset (output, active low)
"""
def __init__(self, eth="w5100", reset_cycles=24000, status_panel=False):
# Ethernet back-end: "w5100" (indirect parallel bus, reaches the EXI
# ceiling) or "w5500" (SPI, ~12 Mbit/s). Both expose the identical
# tx/rx/init/par interface, so only the physical pins differ.
self._eth = eth
# MR-reset settle wait passed to the ethernet master (~1 ms on hardware;
# the testbench overrides with a small value for fast simulation).
self._reset_cycles = reset_cycles
# Optional bring-up status panel (drives onboard LEDs/button on the
# iCEbreaker — see synth.py). panel_led bit order matches StatusPanel.
self._status_panel = status_panel
# EXI (GC side)
self.exi_clk = Signal(init=1)
self.exi_mosi = Signal()
self.exi_cs_n = Signal(init=1)
self.exi_miso = Signal()
self.int_n = Signal(init=1)
if eth == "w5500":
# W5500 SPI
self.w5500_clk = Signal()
self.w5500_mosi = Signal()
self.w5500_miso = Signal()
self.w5500_cs_n = Signal(init=1)
self.w5500_int_n = Signal(init=1)
self.w5500_rst_n = Signal(init=1)
else:
# W5100 indirect parallel bus. data_o/data_oe/data_i are the FPGA
# side of a bidirectional D[7:0] (wrapped in a tristate SB_IO at the
# platform level); a board ties the upper address lines to 0 so only
# A[1:0] are wired.
self.w5100_addr = Signal(2)
self.w5100_data_o = Signal(8)
self.w5100_data_oe = Signal()
self.w5100_data_i = Signal(8)
self.w5100_cs_n = Signal(init=1)
self.w5100_rd_n = Signal(init=1)
self.w5100_wr_n = Signal(init=1)
self.w5100_int_n = Signal(init=1)
self.w5100_rst_n = Signal(init=1)
if status_panel:
self.panel_led = Signal(5) # to onboard LEDs (see StatusPanel)
self.panel_btn = Signal(3) # from onboard button(s)
def elaborate(self, platform):
m = Module()
# ── Clock domain generation ───────────────────────────────────────
# Three domains, two physical sources (1 PLL + 1 internal HFOSC):
# capture @ 54 MHz (PLL) — SPI bit engine only; oversamples the
# 27 MHz EXI clock 2× (robust Mode-3).
# exi @ 24 MHz (HFOSC) — register file / transaction FSM.
# sync @ 24 MHz (HFOSC) — SPRAM, RX/TX engines, ethernet master.
# exi and sync share the HFOSC net (frequency- and phase-matched); the
# AsyncFIFOs between them are still valid CDC and keep the module
# boundaries clean. Only the tiny capture front-end needs the fast
# clock — which is why 27 MHz-EXI / OG performance is reachable on the
# iCE40UP5K even though the register-file logic tops out ~44 MHz.
if platform is not None:
# capture @ 54 MHz: icepll -i 12 -o 54 → DIVR=0 DIVF=71 DIVQ=4.
# 54 MHz = 2× the 27 MHz EXI clock — the minimum oversampling that
# cleanly implements SPI Mode 3. The isolated SPI bit engine closes
# ~91 MHz on this device; the byte-FIFO read path brings the
# integrated capture domain to ~62 MHz, so 54 closes with margin.
m.domains += ClockDomain("capture")
platform.lookup(platform.default_clk).attrs["GLOBAL"] = False
m.submodules.pll = Instance(
"SB_PLL40_PAD",
p_FEEDBACK_PATH = "SIMPLE",
p_DIVR = 0,
p_DIVF = 71,
p_DIVQ = 4,
p_FILTER_RANGE = 1,
i_PACKAGEPIN = platform.request("clk12", dir="-").io,
i_RESETB = Const(1, 1),
i_BYPASS = Const(0, 1),
o_PLLOUTGLOBAL = ClockSignal("capture"),
)
# exi & sync @ 24 MHz: one SB_HFOSC (÷2) drives both slow domains.
# The bulky register-file / SPRAM / W5500 logic is routing-bound at
# ~3344 MHz on the UP5K; 24 MHz closes with large margin. The byte
# rate (27 MHz EXI ÷ 8 ≈ 3.4 MHz) leaves ~7 slow cycles per byte.
m.domains += ClockDomain("exi")
m.domains += ClockDomain("sync")
m.submodules.hfosc = Instance(
"SB_HFOSC",
p_CLKHF_DIV = "0b01", # 48 ÷ 2 → 24 MHz
i_CLKHFEN = Const(1, 1),
i_CLKHFPU = Const(1, 1),
o_CLKHF = ClockSignal("exi"),
)
m.d.comb += ClockSignal("sync").eq(ClockSignal("exi"))
# (simulation: test harness provides capture/exi/sync clocks via add_clock)
# ── Submodules ────────────────────────────────────────────────────
cap = ExiCapture() # SPI bit engine (capture) + byte FIFOs
reg = BBARegisterFile()
arb = SPRAMArbiter()
asm = RXFrameAssembler()
drain = TXFrameDrain()
eth = (W5500SPIMaster(reset_cycles=self._reset_cycles)
if self._eth == "w5500"
else W5100ParallelMaster(reset_cycles=self._reset_cycles))
m.submodules.cap = cap
m.submodules.reg = reg
m.submodules.arb = arb
m.submodules.asm = asm
m.submodules.drain = drain
m.submodules.eth = eth
# ── External pin connections ──────────────────────────────────────
m.d.comb += [
# EXI inputs (to capture-domain front-end)
cap.spi_clk .eq(self.exi_clk),
cap.spi_mosi.eq(self.exi_mosi),
cap.spi_cs_n.eq(self.exi_cs_n),
# EXI outputs
self.exi_miso.eq(cap.spi_miso),
self.int_n .eq(reg.exi_int_n),
]
# Ethernet back-end physical pins
if self._eth == "w5500":
m.d.comb += [
self.w5500_clk .eq(eth.spi_clk),
self.w5500_mosi.eq(eth.spi_mosi),
self.w5500_cs_n.eq(eth.spi_cs_n),
eth.spi_miso .eq(self.w5500_miso),
eth.w5500_int_n.eq(self.w5500_int_n),
self.w5500_rst_n.eq(eth.w5500_rst_n),
]
else:
m.d.comb += [
self.w5100_addr .eq(eth.bus_addr),
self.w5100_data_o .eq(eth.bus_data_o),
self.w5100_data_oe.eq(eth.bus_data_oe),
eth.bus_data_i .eq(self.w5100_data_i),
self.w5100_cs_n .eq(eth.cs_n),
self.w5100_rd_n .eq(eth.rd_n),
self.w5100_wr_n .eq(eth.wr_n),
eth.w5100_int_n .eq(self.w5100_int_n),
self.w5100_rst_n .eq(eth.w5100_rst_n),
]
# ── ExiCapture byte stream ↔ BBARegisterFile (exi domain) ────────
m.d.comb += [
reg.rx_data .eq(cap.rx_data),
reg.rx_rdy .eq(cap.rx_rdy),
cap.rx_en .eq(reg.rx_en),
cap.tx_data .eq(reg.tx_data),
cap.tx_en .eq(reg.tx_en),
reg.tx_rdy .eq(cap.tx_rdy),
reg.cs_active.eq(cap.cs_active), # transaction-active (for DMA reads)
]
# ── BBARegisterFile ↔ SPRAMArbiter (sync domain FIFO sides) ──────
# SPRAM request: reg exi→sync FIFO read side → arb
m.d.comb += [
arb.exi_req_addr .eq(reg.spram_req_r_data),
arb.exi_req_valid.eq(reg.spram_req_r_rdy),
reg.spram_req_r_en.eq(arb.exi_req_ready),
]
# SPRAM response: arb result → reg sync→exi FIFO write side
m.d.comb += [
reg.spram_rsp_w_data.eq(arb.exi_rsp_data),
reg.spram_rsp_w_en .eq(arb.exi_rsp_valid),
# arb does not need w_rdy feedback (spram_rsp FIFO is deeper than latency)
]
# ── BBARegisterFile ↔ TXFrameDrain (sync domain FIFO sides) ──────
m.d.comb += [
drain.tx_bytes_r_data.eq(reg.tx_bytes_r_data),
drain.tx_bytes_r_rdy .eq(reg.tx_bytes_r_rdy),
reg.tx_bytes_r_en .eq(drain.tx_bytes_r_en),
drain.tx_ctrl_r_data.eq(reg.tx_ctrl_r_data),
drain.tx_ctrl_r_rdy .eq(reg.tx_ctrl_r_rdy),
reg.tx_ctrl_r_en .eq(drain.tx_ctrl_r_en),
]
# ── TXFrameDrain ↔ ethernet master (sync domain) ──────────────────
m.d.comb += [
eth.tx_data .eq(drain.tx_data),
eth.tx_valid.eq(drain.tx_valid),
drain.tx_ready.eq(eth.tx_ready),
eth.tx_sof .eq(drain.tx_sof),
eth.tx_eof .eq(drain.tx_eof),
]
# ── ethernet master → RXFrameAssembler (sync domain) ─────────────
m.d.comb += [
asm.rx_data .eq(eth.rx_data),
asm.rx_valid.eq(eth.rx_valid),
eth.rx_ready.eq(asm.rx_ready),
asm.rx_sof .eq(eth.rx_sof),
asm.rx_eof .eq(eth.rx_eof),
]
# ── RXFrameAssembler → SPRAMArbiter (ETH write, sync domain) ─────
m.d.comb += [
arb.eth_wr_addr .eq(asm.eth_wr_addr),
arb.eth_wr_data .eq(asm.eth_wr_data),
arb.eth_wr_valid.eq(asm.eth_wr_valid),
asm.eth_wr_ready.eq(arb.eth_wr_ready),
]
# ── RXFrameAssembler → BBARegisterFile (rx_wptr FIFO write side) ─
m.d.comb += [
reg.rx_wptr_w_data.eq(asm.rx_wptr_w_data),
reg.rx_wptr_w_en .eq(asm.rx_wptr_w_en),
asm.rx_wptr_w_rdy .eq(reg.rx_wptr_w_rdy),
]
# ── Pulse synchronizer connections ────────────────────────────────
m.d.comb += [
# RX irq: sync → exi (RXFrameAssembler → reg → PS → exi domain)
reg.rx_irq_i.eq(asm.rx_irq),
# TX irq: sync → exi
reg.tx_irq_i.eq(drain.tx_irq),
# MAC address (PAR05) → SHAR. exi and sync share the HFOSC net,
# and par is quasi-static (sampled by the master at init_req).
eth.par.eq(reg.par),
]
# ── RX enabled gate (NCRA SR / start-receive bit) ─────────────────
# The RX ring-buffer path is active only after the GC sets NCRA[3].
m.d.comb += asm.rx_enabled.eq(reg.ncra_sr)
# ── Optional bring-up status panel (sync domain) ──────────────────
# init_req = NCRA reset (exi→sync PS), OR'd with the panel's manual
# re-init button when the panel is present.
if self._status_panel:
panel = StatusPanel()
m.submodules.panel = panel
# cs_active lives in the exi domain; bring it to sync for the LED.
cs_a_sync = Signal()
m.submodules.panel_cs = FFSynchronizer(
cap.cs_active, cs_a_sync, o_domain="sync")
# "ready" = ethernet init complete (latched until the next init).
ready = Signal()
with m.If(eth.init_done):
m.d.sync += ready.eq(1)
with m.Elif(reg.ncra_rst_o | panel.reinit):
m.d.sync += ready.eq(0)
m.d.comb += [
panel.cs_active.eq(cs_a_sync),
panel.rx_pulse .eq(asm.rx_irq),
panel.tx_pulse .eq(drain.tx_irq),
panel.ready .eq(ready),
panel.btn .eq(self.panel_btn),
self.panel_led .eq(panel.led),
eth.init_req .eq(reg.ncra_rst_o | panel.reinit),
]
else:
m.d.comb += eth.init_req.eq(reg.ncra_rst_o)
return m
# ── Integration testbench ─────────────────────────────────────────────────
# Drives real EXI Mode-3 transactions on the GC-facing pins and checks the
# response — exercising the full chain ExiCapture (capture domain) ↔ byte FIFOs
# ↔ BBARegisterFile (exi domain) ↔ sync modules, across all three clock domains.
if __name__ == "__main__":
import sys
from amaranth.sim import Simulator, Period
dut = BBATop(eth="w5100", reset_cycles=20, # small reset wait for sim
status_panel=True) # also exercise the panel wiring
errors = []
HALF = 8 # capture ticks per SPI half-period (well-oversampled)
async def spi_byte(ctx, mosi_val):
"""Drive one EXI Mode-3 byte; return the assembled MISO byte."""
miso = 0
for bit in range(7, -1, -1):
ctx.set(dut.exi_mosi, (mosi_val >> bit) & 1)
ctx.set(dut.exi_clk, 0) # falling: slave samples MOSI
await ctx.tick("capture").repeat(HALF)
miso = (miso << 1) | ctx.get(dut.exi_miso)
ctx.set(dut.exi_clk, 1) # rising
await ctx.tick("capture").repeat(HALF)
return miso
async def exi_read(ctx, addr, length):
"""EXI immediate read: 2-byte header, clock-idle gap, then `length` bytes."""
hdr0 = (addr >> 6) & 0x7F
# The header length field is only 2 bits ([1:0]); mask it so a long
# (DMA) read doesn't overflow length-1 into the addr[5:0] bits. For
# SPRAM reads the field is ignored anyway — the stream runs until CS.
hdr1 = ((addr & 0x3F) << 2) | ((length - 1) & 0x3)
ctx.set(dut.exi_cs_n, 0)
ctx.set(dut.exi_clk, 1)
await ctx.tick("capture").repeat(HALF)
await spi_byte(ctx, hdr0)
await spi_byte(ctx, hdr1)
# EXI_Imm clock-idle gap: the core decodes the header and prefetches
# responses into the tx FIFO before the GC clocks the data phase.
await ctx.tick("capture").repeat(HALF * 12)
result = [await spi_byte(ctx, 0x00) for _ in range(length)]
ctx.set(dut.exi_cs_n, 1)
await ctx.tick("capture").repeat(HALF)
return result
async def exi_write(ctx, addr, data):
"""EXI immediate write: 2-byte header then the data bytes."""
hdr0 = 0x80 | ((addr >> 6) & 0x7F)
hdr1 = ((addr & 0x3F) << 2) | (len(data) - 1)
ctx.set(dut.exi_cs_n, 0)
ctx.set(dut.exi_clk, 1)
await ctx.tick("capture").repeat(HALF)
await spi_byte(ctx, hdr0)
await spi_byte(ctx, hdr1)
for b in data:
await spi_byte(ctx, b)
ctx.set(dut.exi_cs_n, 1)
await ctx.tick("capture").repeat(HALF)
# ── W5100 indirect-bus slave model (drives w5100_data_i) ─────────────
# Pre-loads a known MACRAW packet in the RX buffer so we can verify the full
# ethernet→SPRAM→GC path. Same protocol as the W5100ParallelMaster bench.
RX_FRAME = [0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03, 0x04]
_W_RX_BASE = 0x6000
_W_S0_CR = 0x0401
_W_S0_RX_RSR = 0x0426
_W_S0_RX_RD = 0x0428
_W_CR_RECV = 0x40
_A_MR, _A_AR0, _A_AR1, _A_DR = 0b00, 0b01, 0b10, 0b11
def w5100_preload():
plen = len(RX_FRAME) + 2 # MACRAW length includes its header
mem = {}
for i, b in enumerate([(plen >> 8) & 0xFF, plen & 0xFF] + RX_FRAME):
mem[_W_RX_BASE + i] = b
mem[_W_S0_RX_RSR], mem[_W_S0_RX_RSR + 1] = (plen >> 8) & 0xFF, plen & 0xFF
mem[_W_S0_RX_RD], mem[_W_S0_RX_RD + 1] = 0, 0
return mem
w5100_mem = w5100_preload()
async def w5100_model(ctx):
idm_ar = 0
mr = 0
prev_cs = prev_rd = prev_wr = 1
async for vals in ctx.tick("sync").sample(
dut.w5100_cs_n, dut.w5100_rd_n, dut.w5100_wr_n,
dut.w5100_addr, dut.w5100_data_o):
cs, rd, wr, a, do = vals[-5:]
ai = (mr >> 1) & 1
if cs == 0 and rd == 0: # drive read data
if a == _A_MR:
val = mr
elif a == _A_AR0:
val = (idm_ar >> 8) & 0xFF
elif a == _A_AR1:
val = idm_ar & 0xFF
else:
val = w5100_mem.get(idm_ar, 0)
ctx.set(dut.w5100_data_i, val)
if cs == 0 and prev_wr == 0 and wr == 1: # latch write on /WR rising
if a == _A_MR:
mr = do
elif a == _A_AR0:
idm_ar = (idm_ar & 0x00FF) | (do << 8)
elif a == _A_AR1:
idm_ar = (idm_ar & 0xFF00) | do
else:
w5100_mem[idm_ar] = do
if idm_ar == _W_S0_CR and do == _W_CR_RECV:
w5100_mem[_W_S0_RX_RSR] = 0
w5100_mem[_W_S0_RX_RSR + 1] = 0
if ai:
idm_ar = (idm_ar + 1) & 0xFFFF
if cs == 0 and prev_rd == 0 and rd == 1 and a == _A_DR and ai:
idm_ar = (idm_ar + 1) & 0xFFFF
prev_cs, prev_rd, prev_wr = cs, rd, wr
async def testbench(ctx):
ctx.set(dut.exi_clk, 1)
ctx.set(dut.exi_cs_n, 1)
ctx.set(dut.panel_btn, 0b111) # all buttons released (active-low idle)
await ctx.tick("capture").repeat(20)
# T1: device ID — read 4 bytes from addr 0 → 0x04 0x02 0x02 0x00
dev = await exi_read(ctx, 0x0000, 4)
print(f"T1 device ID: {[f'0x{b:02X}' for b in dev]}")
if dev != [0x04, 0x02, 0x02, 0x00]:
errors.append(f"T1 device ID: got {dev}")
await ctx.tick("capture").repeat(HALF)
# T2: write PAR03, read them back through the full chain
await exi_write(ctx, 0x20, [0xDE, 0xAD, 0xBE, 0xEF])
await ctx.tick("capture").repeat(HALF * 4)
par = await exi_read(ctx, 0x20, 4)
print(f"T2 PAR0-3 readback: {[f'0x{b:02X}' for b in par]}")
if par != [0xDE, 0xAD, 0xBE, 0xEF]:
errors.append(f"T2 PAR readback: got {par}")
await ctx.tick("capture").repeat(HALF)
# T3: NWAYS must read back the hardcoded 0x17 (link-up sentinel)
nways = await exi_read(ctx, 0x31, 1)
print(f"T3 NWAYS: 0x{nways[0]:02X} (want 0x17)")
if nways != [0x17]:
errors.append(f"T3 NWAYS: got {nways}")
await ctx.tick("capture").repeat(HALF)
# T4: DMA-style SPRAM read — clock 8 data bytes (past the 4-byte header
# limit) within one CS. Exercises the integrated streaming path:
# ExiCapture(cs_active) → register file SPRAM_STREAM → SPRAMArbiter →
# real SPRAM → MISO, plus the SPRAM_END cleanup. SPRAM is uninitialised
# here, so we check the stream completes (8 bytes, no underrun/hang)
# rather than specific data.
dma = await exi_read(ctx, 0x0100, 8)
print(f"T4 DMA read (8B from 0x100): {[f'0x{b:02X}' for b in dma]}")
if len(dma) != 8:
errors.append(f"T4 DMA read length: got {len(dma)}")
await ctx.tick("capture").repeat(HALF)
# T5: a register read after the streaming read confirms the FSM cleaned
# up (SPRAM_END → HEADER0) and the device is responsive again.
nways2 = await exi_read(ctx, 0x31, 1)
print(f"T5 NWAYS after DMA: 0x{nways2[0]:02X} (want 0x17)")
if nways2 != [0x17]:
errors.append(f"T5 NWAYS after DMA read: got {nways2}")
await ctx.tick("capture").repeat(HALF)
# ── T6: FULL ETHERNET→SPRAM→GC LOOP ──────────────────────────────
# A frame arrives from the network (W5500 model) → W5500 master reads it
# → RXFrameAssembler writes it to the SPRAM ring → GC reads RWP then
# DMA-reads the descriptor+frame back. Exercises the entire RX path.
# The W5100 needs its init sequence (which sets MR.AI / opens socket 0)
# before multi-byte bus accesses work — trigger it via NCRA reset, as
# the real GC driver does, and let it run before enabling RX.
await exi_write(ctx, 0x00, [0x01]) # NCRA reset → init_req pulse
await ctx.tick("capture").repeat(2000) # let W5100 init run
await exi_write(ctx, 0x00, [0x08]) # NCRA SR bit → enable RX
await ctx.tick("capture").repeat(HALF * 2)
ctx.set(dut.w5100_int_n, 0) # W5100: a packet was received
await ctx.tick("capture").repeat(4000) # let the W5100 RX + SPRAM write run
ctx.set(dut.w5100_int_n, 1)
await ctx.tick("capture").repeat(HALF * 2)
rwp = await exi_read(ctx, 0x16, 1) # RX write pointer (page)
total_len = len(RX_FRAME) + 4
got = await exi_read(ctx, 0x0100, total_len) # descriptor + frame
want = [0x00, 0x00, (total_len >> 8) & 0xFF, total_len & 0xFF] + RX_FRAME
print(f"T6 RWP=0x{rwp[0]:02X} (want 0x02)")
print(f"T6 SPRAM[0x100]: {[f'0x{b:02X}' for b in got]}")
print(f"T6 expected : {[f'0x{b:02X}' for b in want]}")
if rwp != [0x02]:
errors.append(f"T6 RWP: got {rwp}, want [0x02]")
if got != want:
errors.append(f"T6 RX frame mismatch:\n got {got}\n want {want}")
# T7: status-panel integration — after all the EXI traffic above, the
# EXI-activity LED (panel led[1] = stretched cs_active) must be lit,
# proving cap.cs_active → FFSync → StatusPanel → LED is wired end-to-end.
leds = ctx.get(dut.panel_led)
if not (leds >> 1) & 1:
errors.append(f"T7 panel: EXI-activity LED not lit (led=0b{leds:05b})")
print(f"T7 panel led=0b{leds:05b} (bit1=EXI activity, expect 1)")
sim = Simulator(dut)
sim.add_clock(Period(MHz=54), domain="capture")
sim.add_clock(Period(MHz=24), domain="exi")
sim.add_clock(Period(MHz=24), domain="sync")
sim.add_testbench(testbench)
sim.add_process(w5100_model)
sim.run()
if errors:
print("\nFAILURES:")
for e in errors:
print(" ", e)
sys.exit(1)
else:
print("\nAll BBATop integration tests passed.")
+222
View File
@@ -0,0 +1,222 @@
"""EEPROM model — exi domain.
Emulates the MX98730EC's 93C46 serial EEPROM.
93C46 protocol (Microwire, bit-bang)
-------------------------------------
CS=1 activates the device.
Data clocked on rising SK edge, 9-bit header then data:
Bit 0: start (always 1)
Bit 1: opcode MSB } READ = 10
Bit 2: opcode LSB }
Bits 38: 6-bit address (MSB first)
After the 9th rising SK the DO line presents the MSB of the 16-bit word.
Each subsequent rising SK advances one bit (MSB→LSB).
Shift register `shift_in` convention
--------------------------------------
`Cat(di_s, shift_in[:-1])` places di_s at bit 0 and shifts existing bits up.
After N edges:
shift_in[N-1] = first bit received (start)
shift_in[0] = last bit received so far
At bit_ctr==8 (after 8 edges, receiving 9th on di_s):
shift_in[7] = start (bit 0)
shift_in[6] = opcode MSB (bit 1)
shift_in[5] = opcode LSB (bit 2)
shift_in[4:0] = addr[5:1] (bits 37, MSB first→LSB first in register)
di_s = addr[0] (bit 8)
opcode = Cat(shift_in[5], shift_in[6]) → 0b10 = READ
address = Cat(di_s, shift_in[0:5]) → addr[0..5]
EEPROM content (64 × 16-bit words)
-------------------------------------
Words 02 hold the source MAC address (Nintendo OUI 00:09:BF:AA:BB:CC).
The GC BBA driver reads words 03 then copies to PAR05.
"""
from amaranth import *
from amaranth.lib.cdc import FFSynchronizer
__all__ = ["EEPROMModel"]
_EEPROM_WORDS = [
0x0009, # word 0: PAR0=0x00, PAR1=0x09
0xBFAA, # word 1: PAR2=0xBF, PAR3=0xAA
0xBBCC, # word 2: PAR4=0xBB, PAR5=0xCC
0x0000, # word 3: checksum placeholder
]
_EEPROM_WORDS += [0x0000] * (64 - len(_EEPROM_WORDS))
_OP_READ = 0b10 # opcode for READ
class EEPROMModel(Elaboratable):
"""93C46 serial EEPROM model in the exi domain (read-only).
Ports
-----
sk / cs / di : bit-bang inputs (raw async; synchronized internally)
do : serial data output
"""
def __init__(self):
self.sk = Signal()
self.cs = Signal()
self.di = Signal()
self.do = Signal()
def elaborate(self, platform):
m = Module()
words = Array([Signal(16, init=v, name=f"e{i}") for i, v in enumerate(_EEPROM_WORDS)])
# ── Input synchronization (async → exi, 2 stages) ────────────────
sk_s = Signal()
cs_s = Signal()
di_s = Signal()
m.submodules.sync_sk = FFSynchronizer(self.sk, sk_s, o_domain="exi")
m.submodules.sync_cs = FFSynchronizer(self.cs, cs_s, o_domain="exi")
m.submodules.sync_di = FFSynchronizer(self.di, di_s, o_domain="exi")
sk_prev = Signal()
m.d.exi += sk_prev.eq(sk_s)
rising_sk = Signal()
m.d.comb += rising_sk.eq(sk_s & ~sk_prev)
# ── State ─────────────────────────────────────────────────────────
shift_in = Signal(9)
bit_ctr = Signal(4) # 0..8 during header receive
shift_out = Signal(16) # data word being shifted out MSB-first
out_ctr = Signal(4) # 0..15, counts bits shifted out
in_read = Signal() # 1 while outputting a word
# DO is combinatorial: MSB of shift_out while in read-out phase
m.d.comb += self.do.eq(Mux(in_read, shift_out[15], 0))
with m.If(~cs_s):
m.d.exi += bit_ctr.eq(0)
m.d.exi += in_read.eq(0)
m.d.exi += out_ctr.eq(0)
with m.Elif(rising_sk):
with m.If(in_read):
# Shift out next bit (MSB first: left shift, zero into LSB)
m.d.exi += shift_out.eq(Cat(0, shift_out[:-1]))
with m.If(out_ctr == 15):
m.d.exi += in_read.eq(0)
m.d.exi += out_ctr.eq(0)
with m.Else():
m.d.exi += out_ctr.eq(out_ctr + 1)
with m.Else():
# Shift di_s in at bit 0 (existing bits move up)
m.d.exi += shift_in.eq(Cat(di_s, shift_in[:-1]))
m.d.exi += bit_ctr.eq(bit_ctr + 1)
with m.If(bit_ctr == 8):
# 9th bit (di_s = addr[0]) arrives.
# shift_in[7] = start, [6]=op_MSB, [5]=op_LSB, [4:0]=addr[5:1]
op = Cat(shift_in[5], shift_in[6]) # 0b10 for READ
adr = Cat(di_s, shift_in[0:5]) # addr[0..5]
with m.If(op == _OP_READ):
m.d.exi += shift_out.eq(words[adr])
m.d.exi += in_read.eq(1)
m.d.exi += out_ctr.eq(0)
return m
# ── Testbench ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
import sys
from amaranth.sim import Simulator, Period
dut = EEPROMModel()
errors = []
HALF = 6 # exi-domain ticks per SK half-period (much longer than sync latency)
async def eeprom_read(ctx, addr):
"""93C46 READ at 6-bit address; returns 16-bit word.
DO is read BEFORE each rising SK edge, since in_read=1 causes
shift_out[15] to be valid between edges. After 16 reads the full
16-bit word is assembled MSB-first.
"""
ctx.set(dut.cs, 1)
ctx.set(dut.sk, 0)
await ctx.tick("exi").repeat(HALF)
# Transmit 9 bits: start(1) + opcode READ(10) + addr[5:0] MSB-first
bits = [1, 1, 0]
for a in range(5, -1, -1):
bits.append((addr >> a) & 1)
for bit in bits:
ctx.set(dut.di, bit)
ctx.set(dut.sk, 1) # rising edge: DUT latches bit
await ctx.tick("exi").repeat(HALF)
ctx.set(dut.sk, 0)
await ctx.tick("exi").repeat(HALF)
# After 9th falling SK: in_read=1, shift_out=word[addr], do=MSB.
# Read DO before each rising edge (it is valid in the LOW phase).
result = 0
for _ in range(16):
result = (result << 1) | ctx.get(dut.do) # sample before rising SK
ctx.set(dut.sk, 1)
await ctx.tick("exi").repeat(HALF)
ctx.set(dut.sk, 0)
await ctx.tick("exi").repeat(HALF)
ctx.set(dut.cs, 0)
await ctx.tick("exi").repeat(HALF)
return result
async def testbench(ctx):
await ctx.tick("exi").repeat(4)
ctx.set(dut.cs, 0)
ctx.set(dut.sk, 0)
ctx.set(dut.di, 0)
await ctx.tick("exi").repeat(4)
w0 = await eeprom_read(ctx, 0)
print(f"T1 word 0 = 0x{w0:04X} (expected 0x0009)")
if w0 != 0x0009:
errors.append(f"T1: word 0 = 0x{w0:04X}, expected 0x0009")
w1 = await eeprom_read(ctx, 1)
print(f"T2 word 1 = 0x{w1:04X} (expected 0xBFAA)")
if w1 != 0xBFAA:
errors.append(f"T2: word 1 = 0x{w1:04X}, expected 0xBFAA")
w2 = await eeprom_read(ctx, 2)
print(f"T3 word 2 = 0x{w2:04X} (expected 0xBBCC)")
if w2 != 0xBBCC:
errors.append(f"T3: word 2 = 0x{w2:04X}, expected 0xBBCC")
# T4: word 3 → 0x0000
w3 = await eeprom_read(ctx, 3)
print(f"T4 word 3 = 0x{w3:04X} (expected 0x0000)")
if w3 != 0x0000:
errors.append(f"T4: word 3 = 0x{w3:04X}, expected 0x0000")
sim = Simulator(dut)
sim.add_clock(Period(MHz=24), domain="exi")
sim.add_testbench(testbench)
with sim.write_vcd("EEPROMModel.vcd"):
sim.run()
if errors:
print("\nFAILURES:")
for e in errors:
print(" ", e)
sys.exit(1)
else:
print("\nAll tests passed.")
+269
View File
@@ -0,0 +1,269 @@
"""ExiCapture — fast EXI byte-capture front-end (capture domain, 54 MHz).
Wraps the SPIMode3Slave bit engine and bridges it to the slower `exi` domain
(24 MHz) through two AsyncFIFOs:
capture (54 MHz) exi (24 MHz)
┌────────────────────┐ rx_fifo ───► received bytes (header + data)
│ SPIMode3Slave │ (8-bit, capture→exi)
│ (bit engine) │ tx_fifo ◄─── response bytes to drive on MISO
└────────────────────┘ (8-bit, exi→capture)
Why split: the bit engine must oversample a 27 MHz EXI clock 2×, which needs a
54 MHz clock — far faster than the register-file logic can close (~44 MHz).
Only this small, shallow front-end runs fast; everything else stays at 24 MHz.
TX response gating
------------------
Every EXI transaction begins with 2 header bytes (write_flag/addr/len) during
which the GC ignores MISO. The core cannot have produced a response yet (it
hasn't even decoded the header), so the wrapper must NOT pop tx_fifo for those
2 bytes. A per-transaction counter (`txld_cnt`, reset by frame_start) gates the
pop: header bytes drive a don't-care 0xFF; from the first data byte onward the
wrapper pops tx_fifo (one byte per tx_load). `tx_hold` is registered at tx_load
time — before the FIFO advances — so the bit engine latches the correct byte on
the following SPI rising edge (the classic FWFT-advance off-by-one is avoided).
"""
from amaranth import *
from amaranth.lib.cdc import FFSynchronizer
from amaranth.lib.fifo import AsyncFIFO
from exi_bba.spi_mode3_slave import SPIMode3Slave
__all__ = ["ExiCapture"]
class ExiCapture(Elaboratable):
"""EXI front-end: SPI bit engine (capture domain) + byte FIFOs to core.
Physical SPI pins (capture domain)
----------------------------------
spi_clk / spi_mosi / spi_cs_n : raw async inputs from the GC
spi_miso : output to the GC
Core-facing RX byte stream (core domain, FWFT read side of rx_fifo)
------------------------------------------------------------------
rx_data : current received byte
rx_rdy : a received byte is available
rx_en : pop (assert for one core cycle to consume rx_data)
Core-facing TX byte stream (core domain, write side of tx_fifo)
--------------------------------------------------------------
tx_data : response byte to enqueue
tx_en : write strobe
tx_rdy : tx_fifo has room
"""
def __init__(self, rx_depth=4, tx_depth=2):
self._rx_depth = rx_depth
self._tx_depth = tx_depth
# Physical SPI (capture domain, wired to pins by BBATop)
self.spi_clk = Signal(init=1)
self.spi_mosi = Signal()
self.spi_cs_n = Signal(init=1)
self.spi_miso = Signal()
# Core-facing RX read side
self.rx_data = Signal(8)
self.rx_rdy = Signal()
self.rx_en = Signal()
# Core-facing TX write side
self.tx_data = Signal(8)
self.tx_en = Signal()
self.tx_rdy = Signal()
# Core-facing: high (exi domain) while a transaction is in progress.
# The register file uses it to stream variable-length (DMA) reads until
# CS deasserts.
self.cs_active = Signal()
def elaborate(self, platform):
m = Module()
spi = SPIMode3Slave(domain="capture")
m.submodules.spi = spi
rx_fifo = AsyncFIFO(width=8, depth=self._rx_depth,
w_domain="capture", r_domain="exi")
tx_fifo = AsyncFIFO(width=8, depth=self._tx_depth,
w_domain="exi", r_domain="capture")
m.submodules.rx_fifo = rx_fifo
m.submodules.tx_fifo = tx_fifo
# cs_active (capture) → exi domain for the register file
m.submodules.cs_sync = FFSynchronizer(spi.cs_active, self.cs_active,
o_domain="exi")
# ── Physical pins ↔ bit engine ───────────────────────────────────
m.d.comb += [
spi.spi_clk .eq(self.spi_clk),
spi.spi_mosi.eq(self.spi_mosi),
spi.spi_cs_n.eq(self.spi_cs_n),
self.spi_miso.eq(spi.spi_miso),
]
# ── RX: every received byte → rx_fifo (capture write side) ───────
m.d.comb += [
rx_fifo.w_data.eq(spi.rx_byte),
rx_fifo.w_en .eq(spi.rx_valid),
]
# Core read side
m.d.comb += [
self.rx_data .eq(rx_fifo.r_data),
self.rx_rdy .eq(rx_fifo.r_rdy),
rx_fifo.r_en .eq(self.rx_en),
]
# ── TX: core write side ──────────────────────────────────────────
m.d.comb += [
tx_fifo.w_data.eq(self.tx_data),
tx_fifo.w_en .eq(self.tx_en),
self.tx_rdy .eq(tx_fifo.w_rdy),
]
# ── TX response gating (capture domain) ──────────────────────────
# The bit engine drives MISO LIVE from tx_byte = tx_fifo head, so the
# response byte at the head is what gets sent for the current data byte.
# `txld_cnt` counts completed bytes within the transaction (tx_load
# pulses at each byte completion):
# completion 0,1 → header bytes (no pop)
# completion ≥2 → a data byte finished → pop to advance the head
# The first data byte (data0) is served live from the head without a
# pop; the pop after it advances the head to data1's response, etc.
txld_cnt = Signal(2)
m.d.comb += spi.tx_byte.eq(tx_fifo.r_data)
# Pop depends ONLY on the registered tx_load and txld_cnt — NOT on
# frame_start. (frame_start precedes byte-0's tx_load by a cycle and
# has already reset txld_cnt to 0, so byte 0 is never a data byte.)
# Keeping cs_fall/frame_start off the pop path shortens the capture-
# domain critical path through the FIFO consume pointer.
#
# `flushing` clears prefetch over-push left in tx_fifo by the previous
# transaction: the register file streams response bytes ahead of the GC
# clock for DMA reads, so when CS deasserts mid-stream a few unsent
# bytes remain. On CS-fall (frame_start) drain tx_fifo to empty before
# the new transaction's data phase, so stale bytes never reach MISO.
flushing = Signal()
m.d.comb += tx_fifo.r_en.eq(
(spi.tx_load & (txld_cnt >= 2)) | (flushing & tx_fifo.r_rdy)
)
with m.If(spi.frame_start):
m.d.capture += flushing.eq(1)
with m.Elif(~tx_fifo.r_rdy):
m.d.capture += flushing.eq(0)
with m.If(spi.frame_start):
m.d.capture += txld_cnt.eq(0)
with m.Elif(spi.tx_load & (txld_cnt < 3)):
m.d.capture += txld_cnt.eq(txld_cnt + 1)
return m
# ── Testbench ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
import sys
from amaranth.sim import Simulator, Period
dut = ExiCapture()
errors = []
# SPI half-period in capture ticks. At 54 MHz capture / 27 MHz EXI the real
# ratio is ~2; use 4 here for a clean, well-oversampled functional check.
HALF = 4
async def spi_byte(ctx, mosi_val):
"""Clock one SPI Mode 3 byte; return the assembled MISO byte."""
miso = 0
for bit in range(7, -1, -1):
ctx.set(dut.spi_mosi, (mosi_val >> bit) & 1)
ctx.set(dut.spi_clk, 0)
await ctx.tick("capture").repeat(HALF)
miso = (miso << 1) | ctx.get(dut.spi_miso)
ctx.set(dut.spi_clk, 1)
await ctx.tick("capture").repeat(HALF)
return miso
async def core_drain_rx(ctx, into):
"""Pop one byte from the core RX side if available."""
if ctx.get(dut.rx_rdy):
into.append(ctx.get(dut.rx_data))
ctx.set(dut.rx_en, 1)
await ctx.tick("exi").repeat(1)
ctx.set(dut.rx_en, 0)
return True
return False
async def push_tx(ctx, b):
ctx.set(dut.tx_data, b)
ctx.set(dut.tx_en, 1)
await ctx.tick("exi").repeat(1)
ctx.set(dut.tx_en, 0)
async def do_txn(ctx, hdr, responses, n_data, rx_seen):
"""One EXI transaction: clock `hdr` bytes, model the clock-idle gap
(drain rx + prefetch `responses` into tx_fifo), then clock `n_data`
data bytes; return the MISO data bytes read."""
ctx.set(dut.spi_cs_n, 0)
ctx.set(dut.spi_clk, 1)
await ctx.tick("capture").repeat(HALF)
for h in hdr:
await spi_byte(ctx, h)
for _ in range(20): # clock-idle gap
await core_drain_rx(ctx, rx_seen)
await ctx.tick("exi").repeat(1)
for r in responses:
await push_tx(ctx, r)
await ctx.tick("capture").repeat(2)
miso = [await spi_byte(ctx, 0x00) for _ in range(n_data)]
ctx.set(dut.spi_cs_n, 1)
await ctx.tick("capture").repeat(HALF)
for _ in range(20): # drain data-phase dummies
await core_drain_rx(ctx, rx_seen)
await ctx.tick("exi").repeat(1)
return miso
async def testbench(ctx):
rx_seen = []
await ctx.tick("capture").repeat(2)
# ── T1: header + 2 data bytes read back ──────────────────────────
miso = await do_txn(ctx, [0x12, 0x34], [0xA5, 0x5A], 2, rx_seen)
print(f"T1 rx={[hex(b) for b in rx_seen[:2]]} MISO={[f'0x{b:02X}' for b in miso]}")
if rx_seen[:2] != [0x12, 0x34]:
errors.append(f"T1 header rx wrong: {rx_seen[:2]}")
if miso != [0xA5, 0x5A]:
errors.append(f"T1 MISO wrong: {[hex(b) for b in miso]}")
# ── T2: prefetch over-push must NOT leak into the next transaction ─
# Txn A pushes 2 responses but the GC clocks only 1 data byte, leaving
# one stale byte in tx_fifo. Txn B must read its OWN fresh responses,
# proving the CS-fall flush cleared the stale prefetch.
rx_seen.clear()
await do_txn(ctx, [0x12, 0x34], [0xA5, 0x5A], 1, rx_seen) # leaves 0x5A
misoB = await do_txn(ctx, [0x12, 0x34], [0x11, 0x22], 2, rx_seen)
print(f"T2 MISO after over-push: {[f'0x{b:02X}' for b in misoB]} (want 0x11 0x22)")
if misoB != [0x11, 0x22]:
errors.append(f"T2 flush failed — stale byte leaked: {[hex(b) for b in misoB]}")
sim = Simulator(dut)
sim.add_clock(Period(MHz=54), domain="capture")
sim.add_clock(Period(MHz=24), domain="exi")
sim.add_testbench(testbench)
with sim.write_vcd("ExiCapture.vcd"):
sim.run()
if errors:
print("\nFAILURES:")
for e in errors:
print(" ", e)
sys.exit(1)
else:
print("\nAll tests passed.")
+312
View File
@@ -0,0 +1,312 @@
"""RX frame assembler — sync domain (24 MHz).
Receives raw ethernet frames from W5500SPIMaster and writes them into the SPRAM
ring buffer in MX98730EC format.
Ring buffer layout (SPRAM byte addresses)
------------------------------------------
0x01000x0FFF 15 pages × 256 bytes = 3840 bytes
Pages 0x010x0F; page 0x00 is reserved.
Page wrap: after 0x0F → 0x01 (skip 0x00).
Frame descriptor (4 bytes at page start)
-----------------------------------------------
Byte 0: LRPS (last received packet status) — 0x00
Byte 1: 0x00
Byte 2: total_length[15:8] (big-endian; includes 4 descriptor bytes)
Byte 3: total_length[7:0]
Bytes 4+: raw ethernet frame
Write sequence
--------------
1. Issue 4 SPRAM writes of 0x00 (placeholder descriptor).
2. For each byte received from W5500, issue one SPRAM write.
3. After EOF: rewrite descriptor bytes 2 and 3 with actual length.
4. Advance RWP, push to rx_wptr FIFO, pulse rx_irq.
"""
from amaranth import *
__all__ = ["RXFrameAssembler"]
_RX_PAGE_FIRST = 0x01
_RX_PAGE_LAST = 0x0F
_PAGES_TOTAL = _RX_PAGE_LAST - _RX_PAGE_FIRST + 1 # 15
class RXFrameAssembler(Elaboratable):
"""Writes incoming ethernet frames into the SPRAM ring buffer.
W5500 streaming interface (sync domain)
----------------------------------------
rx_data / rx_valid / rx_ready : byte stream
rx_sof / rx_eof : frame delimiters (same cycle as rx_valid)
SPRAM write interface (to SPRAMArbiter, sync domain)
-----------------------------------------------------
eth_wr_addr / eth_wr_data / eth_wr_valid / eth_wr_ready
CDC outputs (wired by BBATop)
-----------------------------
rx_wptr_w_data / rx_wptr_w_en / rx_wptr_w_rdy
rx_irq : 1-cycle pulse → PulseSynchronizer input
rx_enabled : controlled by NCRA SR bit (from BBARegisterFile)
"""
def __init__(self):
# W5500 stream in
self.rx_data = Signal(8)
self.rx_valid = Signal()
self.rx_ready = Signal()
self.rx_sof = Signal()
self.rx_eof = Signal()
# SPRAM write out
self.eth_wr_addr = Signal(16)
self.eth_wr_data = Signal(8)
self.eth_wr_valid = Signal()
self.eth_wr_ready = Signal()
# RWP FIFO write-side (sync→exi)
self.rx_wptr_w_data = Signal(8)
self.rx_wptr_w_en = Signal()
self.rx_wptr_w_rdy = Signal()
# rx_irq pulse (→ PulseSynchronizer)
self.rx_irq = Signal()
# RX gate from NCRA SR bit
self.rx_enabled = Signal()
def elaborate(self, platform):
m = Module()
# ── Ring-buffer state ─────────────────────────────────────────────
rwp = Signal(8, init=_RX_PAGE_FIRST) # current RX write page (115)
# Write address within current frame
wr_addr = Signal(16)
# Number of frame data bytes received
data_ctr = Signal(12)
# Total length = data_ctr + 4
total_len = Signal(12)
# Descriptor base (rwp*256) — saved when frame starts
desc_base = Signal(16)
# Placeholder descriptor byte counter (0..3)
desc_ctr = Signal(2)
# Number of pages consumed by this frame (rounded up)
pages_used = Signal(5)
# Default: no pulses
m.d.sync += self.rx_irq.eq(0)
m.d.sync += self.rx_wptr_w_en.eq(0)
# Combinatorial outputs
m.d.comb += total_len.eq(data_ctr + 4)
with m.FSM(domain="sync", name="rx_fsm"):
with m.State("IDLE"):
m.d.comb += self.rx_ready.eq(0)
m.d.sync += self.eth_wr_valid.eq(0)
with m.If(self.rx_valid & self.rx_sof & self.rx_enabled):
frame_base = Signal(16)
m.d.comb += frame_base.eq(Cat(Const(0, 8), rwp))
m.d.sync += desc_base.eq(frame_base)
m.d.sync += wr_addr.eq(frame_base)
m.d.sync += data_ctr.eq(0)
m.d.sync += desc_ctr.eq(0)
m.next = "WRITE_PLACEHOLDER"
with m.State("WRITE_PLACEHOLDER"):
# Write 4 bytes of 0x00 as placeholder descriptor
m.d.sync += self.eth_wr_addr.eq(wr_addr)
m.d.sync += self.eth_wr_data.eq(0x00)
m.d.sync += self.eth_wr_valid.eq(1)
with m.If(self.eth_wr_ready):
m.d.sync += wr_addr.eq(wr_addr + 1)
with m.If(desc_ctr == 3):
m.d.sync += self.eth_wr_valid.eq(0)
m.next = "RECV_AND_WRITE"
with m.Else():
m.d.sync += desc_ctr.eq(desc_ctr + 1)
with m.State("RECV_AND_WRITE"):
# Accept bytes from W5500 and write each to SPRAM immediately
m.d.comb += self.rx_ready.eq(~self.eth_wr_valid | self.eth_wr_ready)
with m.If(self.rx_valid & (~self.eth_wr_valid | self.eth_wr_ready)):
m.d.sync += self.eth_wr_addr.eq(wr_addr)
m.d.sync += self.eth_wr_data.eq(self.rx_data)
m.d.sync += self.eth_wr_valid.eq(1)
m.d.sync += wr_addr.eq(wr_addr + 1)
m.d.sync += data_ctr.eq(data_ctr + 1)
with m.If(self.rx_eof):
m.next = "WAIT_LAST_WRITE"
with m.Elif(self.eth_wr_valid & self.eth_wr_ready):
m.d.sync += self.eth_wr_valid.eq(0)
with m.State("WAIT_LAST_WRITE"):
# Wait for the last data byte write to be accepted
with m.If(~self.eth_wr_valid | self.eth_wr_ready):
m.d.sync += self.eth_wr_valid.eq(0)
# Compute pages used: ceil((data_ctr + 4) / 256)
# = (total_len + 255) >> 8 = total_len[11:8] + (total_len[7:0] != 0)
m.d.sync += pages_used.eq(total_len[8:12] + (total_len[:8] != 0))
m.next = "WRITE_LEN_HI"
with m.State("WRITE_LEN_HI"):
# Overwrite descriptor byte 2 with total_len[15:8]
m.d.sync += self.eth_wr_addr.eq(desc_base + 2)
m.d.sync += self.eth_wr_data.eq(total_len[8:12])
m.d.sync += self.eth_wr_valid.eq(1)
with m.If(self.eth_wr_ready):
m.d.sync += self.eth_wr_valid.eq(0)
m.next = "WRITE_LEN_LO"
with m.State("WRITE_LEN_LO"):
# Overwrite descriptor byte 3 with total_len[7:0]
m.d.sync += self.eth_wr_addr.eq(desc_base + 3)
m.d.sync += self.eth_wr_data.eq(total_len[:8])
m.d.sync += self.eth_wr_valid.eq(1)
with m.If(self.eth_wr_ready):
m.d.sync += self.eth_wr_valid.eq(0)
m.next = "ADVANCE_RWP"
with m.State("ADVANCE_RWP"):
# next_rwp = ((rwp - 1 + pages_used) % 15) + 1
next_rwp_raw = Signal(8)
m.d.comb += next_rwp_raw.eq(rwp + pages_used)
with m.If(next_rwp_raw > _RX_PAGE_LAST):
m.d.sync += rwp.eq(next_rwp_raw - _PAGES_TOTAL)
with m.Else():
m.d.sync += rwp.eq(next_rwp_raw)
m.next = "PUSH_WPT"
with m.State("PUSH_WPT"):
with m.If(self.rx_wptr_w_rdy):
m.d.sync += self.rx_wptr_w_data.eq(rwp)
m.d.sync += self.rx_wptr_w_en.eq(1)
m.d.sync += self.rx_irq.eq(1)
m.next = "IDLE"
return m
# ── Testbench ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
import sys
from amaranth.sim import Simulator, Period
dut = RXFrameAssembler()
errors = []
# Track all SPRAM writes issued by the DUT
spram_writes = []
async def testbench(ctx):
# Setup: acknowledge all SPRAM writes immediately
ctx.set(dut.eth_wr_ready, 1)
ctx.set(dut.rx_wptr_w_rdy, 1)
ctx.set(dut.rx_enabled, 1)
await ctx.tick("sync").repeat(2)
# ── T1: 10-byte frame → pages_used=1, rwp advances 1→2 ──────────────
# Send SOF + first byte
frame = [0xAA, 0xBB, 0xCC, 0xDD, 0x08, 0x00, 0x45, 0x00, 0x00, 0x01]
ctx.set(dut.rx_data, frame[0])
ctx.set(dut.rx_valid, 1)
ctx.set(dut.rx_sof, 1)
await ctx.tick("sync").repeat(1)
ctx.set(dut.rx_sof, 0)
for i, b in enumerate(frame[1:], start=1):
ctx.set(dut.rx_data, b)
ctx.set(dut.rx_eof, 1 if i == len(frame) - 1 else 0)
await ctx.tick("sync").repeat(1)
ctx.set(dut.rx_valid, 0)
ctx.set(dut.rx_eof, 0)
# Poll for up to 30 ticks until rx_irq pulses (1-cycle pulse)
t1_irq_seen = False
t1_wptr_d = 0
for _ in range(30):
await ctx.tick("sync").repeat(1)
if ctx.get(dut.rx_irq):
t1_irq_seen = True
t1_wptr_d = ctx.get(dut.rx_wptr_w_data)
break
print(f"T1 rx_irq_seen={t1_irq_seen} wptr_data=0x{t1_wptr_d:02X}")
if not t1_irq_seen:
errors.append("T1: rx_irq never pulsed")
if t1_wptr_d != 2:
errors.append(f"T1: rwp should be 2 (page 1→2), got {t1_wptr_d}")
await ctx.tick("sync").repeat(4)
# ── T2: Send a second frame; verify rwp advances further ────────────
frame2 = [0x11, 0x22, 0x33, 0x44, 0x55, 0x66]
ctx.set(dut.rx_data, frame2[0])
ctx.set(dut.rx_valid, 1)
ctx.set(dut.rx_sof, 1)
await ctx.tick("sync").repeat(1)
ctx.set(dut.rx_sof, 0)
for i, b in enumerate(frame2[1:], start=1):
ctx.set(dut.rx_data, b)
ctx.set(dut.rx_eof, 1 if i == len(frame2) - 1 else 0)
await ctx.tick("sync").repeat(1)
ctx.set(dut.rx_valid, 0)
ctx.set(dut.rx_eof, 0)
t2_irq_seen = False
t2_wptr_d = 0
for _ in range(30):
await ctx.tick("sync").repeat(1)
if ctx.get(dut.rx_irq):
t2_irq_seen = True
t2_wptr_d = ctx.get(dut.rx_wptr_w_data)
break
print(f"T2 rx_irq_seen={t2_irq_seen} wptr_data=0x{t2_wptr_d:02X}")
if not t2_irq_seen:
errors.append("T2: rx_irq never pulsed after second frame")
if t2_wptr_d != 3:
errors.append(f"T2: rwp should be 3 (page 2→3), got {t2_wptr_d}")
# ── T3: RX disabled — SOF must be ignored ──────────────────────────
ctx.set(dut.rx_enabled, 0)
ctx.set(dut.rx_data, 0xDE)
ctx.set(dut.rx_valid, 1)
ctx.set(dut.rx_sof, 1)
await ctx.tick("sync").repeat(4)
ctx.set(dut.rx_valid, 0)
ctx.set(dut.rx_sof, 0)
# No SPRAM write should have been issued
wr_valid = ctx.get(dut.eth_wr_valid)
if wr_valid:
errors.append("T3: SPRAM write issued while rx_enabled=0")
print(f"T3 rx disabled: eth_wr_valid={wr_valid} (expected 0)")
sim = Simulator(dut)
sim.add_clock(Period(MHz=24), domain="sync")
sim.add_testbench(testbench)
with sim.write_vcd("RXFrameAssembler.vcd"):
sim.run()
if errors:
print("\nFAILURES:")
for e in errors:
print(" ", e)
sys.exit(1)
else:
print("\nAll tests passed.")
+274
View File
@@ -0,0 +1,274 @@
"""SPI Mode 3 byte-oriented slave for the EXI bus.
CPOL=1, CPHA=1: CLK idles HIGH.
Slave samples MOSI on the FALLING CLK edge.
Slave drives MISO on the RISING CLK edge (master samples on next falling edge).
All three raw inputs are run through a 2-stage FFSynchronizer before use.
"""
from amaranth import *
from amaranth.lib.cdc import FFSynchronizer
# ── public re-export for import convenience ─────────────────────────────────
__all__ = ["SPIMode3Slave"]
class SPIMode3Slave(Elaboratable):
"""Byte-oriented SPI Mode 3 slave.
Ports
-----
spi_clk / spi_mosi / spi_cs_n : raw async inputs from GC (synchronized internally)
spi_miso : output to GC; idles HIGH when CS deasserted
rx_byte : last complete received byte (valid when rx_valid pulses)
rx_valid : 1-cycle pulse in exi domain when rx_byte contains a new byte
tx_byte : upstream loads this before or within one exi clock of tx_load pulsing
tx_load : 1-cycle pulse requesting the next TX byte from upstream
"""
def __init__(self, domain="capture"):
# Clock domain this byte engine runs in. Split-domain design puts the
# bit engine in a fast `capture` domain (54 MHz) so it can oversample
# a 27 MHz EXI clock ~3×; the register file lives in a slower domain.
self._domain = domain
self.spi_clk = Signal(init=1) # idles HIGH
self.spi_mosi = Signal()
self.spi_cs_n = Signal(init=1) # active LOW
self.spi_miso = Signal() # combinatorial output
self.rx_byte = Signal(8)
self.rx_valid = Signal()
self.tx_byte = Signal(8)
self.tx_load = Signal()
# 1-cycle pulse on CS assertion (transaction start). The capture
# wrapper uses it to reset its per-transaction TX byte counter.
self.frame_start = Signal()
# Level: high while CS is asserted (a transaction is in progress).
# Lets downstream logic detect variable-length (DMA) transaction ends.
self.cs_active = Signal()
def elaborate(self, platform):
m = Module()
d = self._domain
# ── Input synchronization (async → exi, 2 stages) ──────────────────
clk_s = Signal(init=1)
mosi_s = Signal()
cs_s = Signal(init=1)
m.submodules.sync_clk = FFSynchronizer(self.spi_clk, clk_s, o_domain=d, init=1)
m.submodules.sync_mosi = FFSynchronizer(self.spi_mosi, mosi_s, o_domain=d)
m.submodules.sync_cs = FFSynchronizer(self.spi_cs_n, cs_s, o_domain=d, init=1)
# ── Edge detection ──────────────────────────────────────────────────
clk_prev = Signal(init=1)
cs_prev = Signal(init=1)
m.d[d] += clk_prev.eq(clk_s)
m.d[d] += cs_prev.eq(cs_s)
falling_clk = Signal()
rising_clk = Signal()
cs_fall = Signal()
cs_rise = Signal()
m.d.comb += falling_clk.eq(~clk_s & clk_prev)
m.d.comb += rising_clk .eq( clk_s & ~clk_prev)
m.d.comb += cs_fall .eq(~cs_s & cs_prev)
m.d.comb += cs_rise .eq( cs_s & ~cs_prev)
m.d.comb += self.frame_start.eq(cs_fall)
m.d.comb += self.cs_active.eq(~cs_s)
# ── Shift registers ─────────────────────────────────────────────────
rx_shift = Signal(8)
tx_shift = Signal(8)
bit_ctr = Signal(4) # counts 0..7; 7 means "8th (last) bit"
armed = Signal(init=1) # between bytes: drive the LIVE tx_byte MSB
rearm = Signal() # arm for next byte on the next rising edge
# MISO: idle HIGH when CS deasserted. While "armed" — i.e. at the start
# of a byte, including the inter-byte / clock-idle gap before the first
# falling edge — drive the LIVE tx_byte MSB. This is what lets a
# response that upstream pushes DURING the EXI clock-idle gap reach MISO
# in time: there is no clock edge during the gap to latch it, so MISO
# must be combinational on tx_byte until the byte actually starts. Once
# shifting (after the first falling edge) drive the latched shift reg.
m.d.comb += self.spi_miso.eq(
Mux(cs_s, 1, Mux(armed, self.tx_byte[7], tx_shift[7]))
)
# Default: deassert single-cycle pulses every cycle
m.d[d] += self.rx_valid.eq(0)
m.d[d] += self.tx_load.eq(0)
with m.If(cs_fall):
# Transaction start: first byte drives its MSB live (armed).
m.d[d] += bit_ctr.eq(0)
m.d[d] += armed.eq(1)
with m.Elif(cs_rise | cs_s):
# CS deasserted / idle: reset state
m.d[d] += bit_ctr.eq(0)
m.d[d] += armed.eq(1)
with m.Else():
# CS asserted: run bit engine
with m.If(falling_clk):
# Sample MOSI (MSB first: left-shift, new bit enters at LSB)
# Cat(a, b) → a at lower bits; so Cat(mosi, rx[6:0]) = {rx[6:0], mosi}
m.d[d] += rx_shift.eq(Cat(mosi_s, rx_shift[:-1]))
with m.If(armed):
# First falling edge of this byte: master has just sampled
# the MSB (driven live above). Latch tx_byte so the
# remaining 7 bits shift out of a stable register.
m.d[d] += tx_shift.eq(self.tx_byte)
m.d[d] += armed.eq(0)
with m.If(bit_ctr == 7):
# 8th falling edge: byte complete. The master samples the
# LSB on THIS edge, so MISO must still hold tx_shift[7].
# Defer arming to the next rising edge (rearm) so MISO is
# not switched to the next byte's live MSB too early.
m.d[d] += self.rx_byte.eq(Cat(mosi_s, rx_shift[:-1]))
m.d[d] += self.rx_valid.eq(1)
m.d[d] += bit_ctr.eq(0)
m.d[d] += self.tx_load.eq(1) # advance source to next byte
m.d[d] += rearm.eq(1) # arm on the next rising edge
with m.Else():
m.d[d] += bit_ctr.eq(bit_ctr + 1)
with m.If(rising_clk):
with m.If(rearm):
# Byte boundary: arm for the next byte (live MSB drive).
m.d[d] += armed.eq(1)
m.d[d] += rearm.eq(0)
with m.Elif(~armed):
# Shift left: next bit into MSB position
# Cat(0, tx[6:0]) = {tx[6:0], 0} — left shift
m.d[d] += tx_shift.eq(Cat(0, tx_shift[:-1]))
return m
# ── Testbench ───────────────────────────────────────────────────────────────
if __name__ == "__main__":
from amaranth.sim import Simulator, Period
dut = SPIMode3Slave()
# 4 exi ticks per SPI half-period → well above the 3-cycle (2 sync + 1 edge) latency.
HALF = 4
async def spi_send_byte(ctx, mosi_val, next_tx_byte=None):
"""Drive one SPI Mode 3 byte on MOSI; return the MISO byte assembled.
next_tx_byte: if given, written to tx_byte after the LAST falling edge
(before the last rising edge) so need_reload picks it up in time.
"""
miso_byte = 0
for bit in range(7, -1, -1):
ctx.set(dut.spi_mosi, (mosi_val >> bit) & 1)
ctx.set(dut.spi_clk, 0) # falling edge
await ctx.tick("capture").repeat(HALF)
miso_byte = (miso_byte << 1) | ctx.get(dut.spi_miso)
# Set next TX byte here — after last fall, before rising edge.
# The rising edge is detected 3 cycles after we assert clk=1,
# so we have HALF ticks of margin.
if bit == 0 and next_tx_byte is not None:
ctx.set(dut.tx_byte, next_tx_byte)
ctx.set(dut.spi_clk, 1) # rising edge
await ctx.tick("capture").repeat(HALF)
return miso_byte
errors = []
async def testbench(ctx):
# ── Test 1: Single byte TX/RX ──────────────────────────────────────
ctx.set(dut.spi_cs_n, 0)
ctx.set(dut.spi_clk, 1)
ctx.set(dut.tx_byte, 0xA5) # pre-load before CS fall is detected
await ctx.tick("capture").repeat(HALF)
miso = await spi_send_byte(ctx, 0x37)
await ctx.tick("capture").repeat(2)
rx = ctx.get(dut.rx_byte)
ctx.set(dut.spi_cs_n, 1)
await ctx.tick("capture").repeat(HALF)
if rx != 0x37:
errors.append(f"Test1 rx_byte: expected 0x37, got 0x{rx:02X}")
if miso != 0xA5:
errors.append(f"Test1 miso: expected 0xA5, got 0x{miso:02X}")
print(f"Test1 MOSI→rx_byte: 0x{rx:02X} MISO←tx_byte: 0x{miso:02X}")
await ctx.tick("capture").repeat(HALF)
# ── Test 2: Two-byte transaction; second byte loaded via need_reload ─
ctx.set(dut.spi_cs_n, 0)
ctx.set(dut.tx_byte, 0xBE) # first response byte
await ctx.tick("capture").repeat(HALF)
# Pass next_tx_byte=0xEF so it's set after last falling edge of byte 0,
# giving need_reload time to load it on the subsequent rising edge.
miso0 = await spi_send_byte(ctx, 0x00, next_tx_byte=0xEF)
miso1 = await spi_send_byte(ctx, 0xFF)
await ctx.tick("capture").repeat(2)
rx1 = ctx.get(dut.rx_byte)
ctx.set(dut.spi_cs_n, 1)
await ctx.tick("capture").repeat(HALF)
if miso0 != 0xBE:
errors.append(f"Test2 miso0: expected 0xBE, got 0x{miso0:02X}")
if miso1 != 0xEF:
errors.append(f"Test2 miso1: expected 0xEF, got 0x{miso1:02X}")
if rx1 != 0xFF:
errors.append(f"Test2 rx1: expected 0xFF, got 0x{rx1:02X}")
print(f"Test2 byte0 MISO: 0x{miso0:02X} byte1 MISO: 0x{miso1:02X} rx1: 0x{rx1:02X}")
await ctx.tick("capture").repeat(HALF)
# ── Test 3: MISO idles HIGH when CS deasserted ─────────────────────
miso_idle = ctx.get(dut.spi_miso)
if miso_idle != 1:
errors.append(f"Test3 MISO idle: expected 1, got {miso_idle}")
print(f"Test3 MISO idle (CS=1): {miso_idle}")
# ── Test 4: All-zeros byte (0x00) TX and RX ────────────────────────
ctx.set(dut.spi_cs_n, 0)
ctx.set(dut.tx_byte, 0x00)
await ctx.tick("capture").repeat(HALF)
miso = await spi_send_byte(ctx, 0xFF)
await ctx.tick("capture").repeat(2)
rx = ctx.get(dut.rx_byte)
ctx.set(dut.spi_cs_n, 1)
await ctx.tick("capture").repeat(HALF)
if miso != 0x00:
errors.append(f"Test4 miso: expected 0x00, got 0x{miso:02X}")
if rx != 0xFF:
errors.append(f"Test4 rx: expected 0xFF, got 0x{rx:02X}")
print(f"Test4 0x00 TX / 0xFF RX: MISO=0x{miso:02X} rx=0x{rx:02X}")
sim = Simulator(dut)
sim.add_clock(Period(MHz=54), domain="capture")
sim.add_testbench(testbench)
with sim.write_vcd("SPIMode3Slave.vcd"):
sim.run()
if errors:
print("\nFAILURES:")
for e in errors:
print(" ", e)
raise SystemExit(1)
else:
print("\nAll tests passed.")
+276
View File
@@ -0,0 +1,276 @@
"""SPRAM arbiter — sync domain (24 MHz).
Owns the iCE40UP5K 128 KB SPRAM (SB_SPRAM256KA, 16-bit wide) and arbitrates
between two clients:
Client A (EXI read) : prefetch pipeline; low priority.
Client B (ETH write): RXFrameAssembler; high priority.
ETH writes win when both clients are active. This is safe because the GC only
reads pages that the ETH engine has already finished writing (ring-buffer
invariant).
SPRAM addressing
-----------------
SB_SPRAM256KA is 64 K × 16-bit. Byte addressing:
ADDRESS = byte_addr >> 1
MASKWREN[3:0]:
0b0011 → write lower byte (byte_addr even)
0b1100 → write upper byte (byte_addr odd)
Read: both bytes returned; pick the right one from DATAOUT based on addr bit 0.
Read latency: 1 synchronous cycle — result of cycle N is valid at N+1.
In simulation (platform is None) a behavioural Array model is used instead of
the SB_SPRAM256KA Instance so tests run without IceStorm.
"""
from amaranth import *
from amaranth.lib.memory import Memory
__all__ = ["SPRAMArbiter"]
_SPRAM_WORDS = 65536 # 64 K 16-bit words = 128 KB
class SPRAMArbiter(Elaboratable):
"""Arbitrated SPRAM controller in the sync domain.
EXI read interface (from BBARegisterFile spram_req / spram_rsp FIFOs)
----------------------------------------------------------------------
exi_req_addr : 16-bit byte address to read
exi_req_valid : FIFO r_rdy — a request is waiting
exi_req_ready : FIFO r_en — pop the request (asserted when serviced)
exi_rsp_data : 8-bit result byte
exi_rsp_valid : FIFO w_en — push result when valid
ETH write interface (from RXFrameAssembler)
-------------------------------------------
eth_wr_addr : 16-bit byte address to write
eth_wr_data : 8-bit byte value
eth_wr_valid : write request present
eth_wr_ready : write accepted this cycle
"""
def __init__(self):
# EXI read interface
self.exi_req_addr = Signal(16)
self.exi_req_valid = Signal()
self.exi_req_ready = Signal()
self.exi_rsp_data = Signal(8)
self.exi_rsp_valid = Signal()
# ETH write interface
self.eth_wr_addr = Signal(16)
self.eth_wr_data = Signal(8)
self.eth_wr_valid = Signal()
self.eth_wr_ready = Signal()
def elaborate(self, platform):
m = Module()
# ── SPRAM instantiation (hardware vs simulation) ──────────────────
spram_addr = Signal(14) # word address (byte_addr >> 1)
spram_din = Signal(16)
spram_dout = Signal(16)
spram_wren = Signal()
spram_mask = Signal(4) # MASKWREN
if platform is None:
# Behavioural model: synchronous read with 1-cycle latency.
# Memory is a Component; read/write ports are obtained from it
# and wired via its submodule ports (not added as separate submodules).
mem = Memory(shape=16, depth=_SPRAM_WORDS, init=[])
m.submodules.mem = mem
mem_rd = mem.read_port(domain="sync", transparent_for=[])
mem_wr = mem.write_port(domain="sync", granularity=8)
# en[0] = lower byte enable, en[1] = upper byte enable
byte0_en = Signal()
byte1_en = Signal()
m.d.comb += [
byte0_en .eq(spram_wren & (spram_mask[0] | spram_mask[1])),
byte1_en .eq(spram_wren & (spram_mask[2] | spram_mask[3])),
mem_rd.addr .eq(spram_addr),
mem_rd.en .eq(1),
spram_dout .eq(mem_rd.data),
mem_wr.addr .eq(spram_addr),
mem_wr.data .eq(spram_din),
mem_wr.en .eq(Cat(byte0_en, byte1_en)),
]
else:
# Hardware: instantiate two SB_SPRAM256KA (64K×16 each; use one)
m.submodules.spram = Instance(
"SB_SPRAM256KA",
i_ADDRESS = spram_addr,
i_DATAIN = spram_din,
i_MASKWREN = spram_mask,
i_WREN = spram_wren,
i_CHIPSELECT = Const(1, 1),
i_CLOCK = ClockSignal("sync"),
i_STANDBY = Const(0, 1),
i_SLEEP = Const(0, 1),
i_POWEROFF = Const(1, 1),
o_DATAOUT = spram_dout,
)
# ── Arbiter pipeline ─────────────────────────────────────────────
# Stage 1: issue SPRAM address and control signals (combinatorial)
# Stage 2: capture SPRAM output into rsp_buf (synchronous, 1-cycle)
read_pending = Signal() # a read address was issued last cycle
read_was_odd = Signal() # byte address bit 0 of the pending read
rsp_buf = Signal(8) # registered response byte; valid when exi_rsp_valid
# Combinatorial defaults
m.d.comb += [
spram_wren .eq(0),
spram_mask .eq(0),
spram_din .eq(0),
spram_addr .eq(0),
self.exi_req_ready.eq(0),
self.eth_wr_ready .eq(0),
self.exi_rsp_data .eq(rsp_buf), # always sourced from registered buffer
]
# Registered defaults
m.d.sync += [
self.exi_rsp_valid.eq(0),
read_pending .eq(0),
]
# ETH write has priority
with m.If(self.eth_wr_valid):
m.d.comb += [
spram_addr .eq(self.eth_wr_addr[1:]),
spram_wren .eq(1),
self.eth_wr_ready.eq(1),
]
with m.If(self.eth_wr_addr[0]):
m.d.comb += [
spram_din [8:16].eq(self.eth_wr_data),
spram_mask .eq(0b1100),
]
with m.Else():
m.d.comb += [
spram_din [0:8].eq(self.eth_wr_data),
spram_mask .eq(0b0011),
]
# EXI read (lower priority)
with m.Elif(self.exi_req_valid):
m.d.comb += [
spram_addr .eq(self.exi_req_addr[1:]),
self.exi_req_ready.eq(1),
]
m.d.sync += [
read_pending.eq(1),
read_was_odd.eq(self.exi_req_addr[0]),
]
# Capture SPRAM output into registered buffer after 1-cycle latency
with m.If(read_pending):
with m.If(read_was_odd):
m.d.sync += rsp_buf.eq(spram_dout[8:16])
with m.Else():
m.d.sync += rsp_buf.eq(spram_dout[0:8])
m.d.sync += self.exi_rsp_valid.eq(1)
return m
# ── Testbench ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
import sys
from amaranth.sim import Simulator, Period
dut = SPRAMArbiter()
errors = []
async def testbench(ctx):
await ctx.tick("sync").repeat(2)
# T1: ETH write to even byte address 0x0100, then EXI read it back
ctx.set(dut.eth_wr_addr, 0x0100)
ctx.set(dut.eth_wr_data, 0xAB)
ctx.set(dut.eth_wr_valid, 1)
await ctx.tick("sync").repeat(1)
accepted = ctx.get(dut.eth_wr_ready)
if not accepted:
errors.append("T1 eth write not accepted")
ctx.set(dut.eth_wr_valid, 0)
await ctx.tick("sync").repeat(1)
# Issue EXI read of the same address
ctx.set(dut.exi_req_addr, 0x0100)
ctx.set(dut.exi_req_valid, 1)
await ctx.tick("sync").repeat(1) # clock A: read issued, read_pending=1
ctx.set(dut.exi_req_valid, 0)
await ctx.tick("sync").repeat(1) # clock B: SPRAM output captured, valid=1
# Check HERE — exi_rsp_valid is 1 for exactly this one cycle
rdata = ctx.get(dut.exi_rsp_data)
rvalid = ctx.get(dut.exi_rsp_valid)
if rdata != 0xAB:
errors.append(f"T1 read back: expected 0xAB, got 0x{rdata:02X}")
if not rvalid:
errors.append("T1 exi_rsp_valid not set")
print(f"T1 even addr read-back: data=0x{rdata:02X} valid={rvalid}")
await ctx.tick("sync").repeat(2)
# T2: ETH write to ODD byte address 0x0101, read back
ctx.set(dut.eth_wr_addr, 0x0101)
ctx.set(dut.eth_wr_data, 0xCD)
ctx.set(dut.eth_wr_valid, 1)
await ctx.tick("sync").repeat(1)
ctx.set(dut.eth_wr_valid, 0)
await ctx.tick("sync").repeat(1)
ctx.set(dut.exi_req_addr, 0x0101)
ctx.set(dut.exi_req_valid, 1)
await ctx.tick("sync").repeat(1)
ctx.set(dut.exi_req_valid, 0)
await ctx.tick("sync").repeat(1)
rdata = ctx.get(dut.exi_rsp_data)
if rdata != 0xCD:
errors.append(f"T2 odd addr read-back: expected 0xCD, got 0x{rdata:02X}")
print(f"T2 odd addr read-back: data=0x{rdata:02X}")
await ctx.tick("sync").repeat(2)
# T3: ETH write wins when both clients active simultaneously
# Write 0xEE to 0x0200
ctx.set(dut.eth_wr_addr, 0x0200)
ctx.set(dut.eth_wr_data, 0xEE)
ctx.set(dut.eth_wr_valid, 1)
ctx.set(dut.exi_req_addr, 0x0100) # also wants to read
ctx.set(dut.exi_req_valid, 1)
await ctx.tick("sync").repeat(1)
eth_won = ctx.get(dut.eth_wr_ready)
exi_blocked = not ctx.get(dut.exi_req_ready)
ctx.set(dut.eth_wr_valid, 0)
ctx.set(dut.exi_req_valid, 0)
if not eth_won:
errors.append("T3 ETH priority: ETH write not accepted")
if not exi_blocked:
errors.append("T3 ETH priority: EXI read was not blocked")
print(f"T3 ETH priority: eth_won={eth_won} exi_blocked={exi_blocked}")
sim = Simulator(dut)
sim.add_clock(Period(MHz=24), domain="sync")
sim.add_testbench(testbench)
with sim.write_vcd("SPRAMArbiter.vcd"):
sim.run()
if errors:
print("\nFAILURES:")
for e in errors:
print(" ", e)
sys.exit(1)
else:
print("\nAll tests passed.")
+227
View File
@@ -0,0 +1,227 @@
"""StatusPanel — 5-LED / 3-button bring-up panel (sync domain).
A development/diagnostics front panel for the iCEbreaker LED+button PMOD. It
turns the device's internal liveness signals into something you can watch on a
real GameCube during bring-up, and gives three buttons for manual control.
LEDs (logical, active-high; set `led_active_low=True` if the board sinks current)
led[0] heartbeat — ~12 Hz blink: clock alive, bitstream loaded
led[1] exi_active — stretched `cs_active`: the GC is talking on EXI
led[2] rx_act — stretched `rx_pulse`: a packet arrived from the net
led[3] tx_act — stretched `tx_pulse`: a packet went out
led[4] ready — `ready` level (e.g. ethernet init complete)
Buttons (raw pin level; `btn_active_low=True` for the usual pull-up wiring)
btn[0] eth_rst — while held, drive `eth_rst_n` low (reset the ethernet chip)
btn[1] reinit — on press, emit a one-cycle `reinit` pulse (force re-init)
btn[2] freeze — toggle: latch the rx/tx activity LEDs so a single one-shot
blink sticks until you unfreeze (catch a lone packet)
Single-cycle events (`rx_pulse`/`tx_pulse`) are stretched to ~`stretch_cycles`
so the eye can see them; `cs_active` is a level that is re-triggered while high.
Buttons are debounced (`debounce_cycles` stable samples) — same idea as
`rebbarb/debouncer.py`, inlined here to keep this module self-contained.
"""
from amaranth import *
__all__ = ["StatusPanel"]
class StatusPanel(Elaboratable):
def __init__(self, hb_bit=23, stretch_cycles=1_440_000,
debounce_cycles=240_000, led_active_low=False,
btn_active_low=True):
# hb_bit: heartbeat = bit `hb_bit` of a free-running counter
# (24 MHz / 2**23 ≈ 1.4 Hz). stretch_cycles ≈ 60 ms at 24 MHz.
self._hb_bit = hb_bit
self._stretch = stretch_cycles
self._deb = debounce_cycles
self._led_inv = led_active_low
self._btn_inv = btn_active_low
# Status inputs (sync domain)
self.cs_active = Signal() # level: EXI transaction in progress
self.rx_pulse = Signal() # 1-cycle: frame received
self.tx_pulse = Signal() # 1-cycle: frame sent
self.ready = Signal() # level: ethernet ready
# Raw button inputs (from pins)
self.btn = Signal(3)
# Outputs
self.led = Signal(5)
self.eth_rst_n = Signal(init=1) # btn0 held → 0
self.reinit = Signal() # btn1 press → 1-cycle pulse
def elaborate(self, platform):
m = Module()
# ── Heartbeat ────────────────────────────────────────────────────
hb = Signal(self._hb_bit + 1)
m.d.sync += hb.eq(hb + 1)
heartbeat = hb[self._hb_bit]
# ── Button conditioning (normalise polarity → debounce) ──────────
braw = Signal(3)
m.d.comb += braw.eq(self.btn ^ C(0b111 if self._btn_inv else 0, 3))
bdeb = Signal(3)
for i in range(3):
cnt = Signal(range(self._deb + 1), name=f"deb_cnt{i}")
with m.If(braw[i] == bdeb[i]):
m.d.sync += cnt.eq(0) # stable: hold
with m.Else():
m.d.sync += cnt.eq(cnt + 1) # changing: count stable samples
with m.If(cnt == self._deb - 1):
m.d.sync += [bdeb[i].eq(braw[i]), cnt.eq(0)]
# btn0: hold → ethernet reset asserted (active-low output)
m.d.comb += self.eth_rst_n.eq(~bdeb[0])
# btn1: rising edge → reinit pulse
b1_prev = Signal()
m.d.sync += b1_prev.eq(bdeb[1])
m.d.comb += self.reinit.eq(bdeb[1] & ~b1_prev)
# btn2: rising edge toggles freeze
b2_prev = Signal()
freeze = Signal()
m.d.sync += b2_prev.eq(bdeb[2])
with m.If(bdeb[2] & ~b2_prev):
m.d.sync += freeze.eq(~freeze)
# ── Activity stretchers (rx/tx), sticky while frozen ─────────────
def stretch(pulse, name):
cnt = Signal(range(self._stretch + 1), name=f"{name}_cnt")
sticky = Signal(name=f"{name}_sticky")
with m.If(pulse):
m.d.sync += cnt.eq(self._stretch)
with m.If(freeze):
m.d.sync += sticky.eq(1) # latch a one-shot when frozen
with m.Elif(cnt != 0):
m.d.sync += cnt.eq(cnt - 1)
with m.If(~freeze):
m.d.sync += sticky.eq(0) # clear sticky when unfrozen
return (cnt != 0) | sticky
rx_led = stretch(self.rx_pulse, "rx")
tx_led = stretch(self.tx_pulse, "tx")
# ── cs_active: level → stretched so brief transactions are visible ─
cs_cnt = Signal(range(self._stretch + 1))
with m.If(self.cs_active):
m.d.sync += cs_cnt.eq(self._stretch)
with m.Elif(cs_cnt != 0):
m.d.sync += cs_cnt.eq(cs_cnt - 1)
cs_led = cs_cnt != 0
leds = Cat(heartbeat, cs_led, rx_led, tx_led, self.ready)
m.d.comb += self.led.eq(leds ^ C(0b11111 if self._led_inv else 0, 5))
return m
# ── Testbench ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
import sys
from amaranth.sim import Simulator, Period
# Tiny parameters so the timed behaviours are observable in a short sim.
dut = StatusPanel(hb_bit=3, stretch_cycles=8, debounce_cycles=3)
errors = []
async def settle(ctx, n=1):
await ctx.tick("sync").repeat(n)
async def testbench(ctx):
ctx.set(dut.btn, 0b111) # active-low idle (no press)
await settle(ctx, 4)
# T1: heartbeat toggles (bit 3 of the counter flips every 8 cycles)
h0 = ctx.get(dut.led) & 1
await settle(ctx, 8)
h1 = ctx.get(dut.led) & 1
if h0 == h1:
errors.append("T1 heartbeat did not toggle over 8 cycles")
print(f"T1 heartbeat toggled: {h0} -> {h1}")
# T2: rx pulse lights led[2] and it stretches, then clears
ctx.set(dut.rx_pulse, 1)
await settle(ctx, 1)
ctx.set(dut.rx_pulse, 0)
await settle(ctx, 1)
on = (ctx.get(dut.led) >> 2) & 1
if not on:
errors.append("T2 rx LED not lit after pulse")
await settle(ctx, 12) # > stretch_cycles
off = (ctx.get(dut.led) >> 2) & 1
if off:
errors.append("T2 rx LED did not clear after stretch")
print(f"T2 rx LED: on={on} then off={not off}")
# T3: ready level drives led[4]
ctx.set(dut.ready, 1)
await settle(ctx, 1)
if not ((ctx.get(dut.led) >> 4) & 1):
errors.append("T3 ready LED not lit")
ctx.set(dut.ready, 0)
print("T3 ready LED follows level")
# T4: btn0 held (active-low → drive 0) asserts eth_rst_n low after debounce
ctx.set(dut.btn, 0b110) # btn0 pressed
await settle(ctx, 6) # > debounce
if ctx.get(dut.eth_rst_n) != 0:
errors.append("T4 eth_rst_n not asserted while btn0 held")
ctx.set(dut.btn, 0b111) # release
await settle(ctx, 6)
if ctx.get(dut.eth_rst_n) != 1:
errors.append("T4 eth_rst_n not released")
print("T4 btn0 → eth_rst_n hold/release ok")
# T5: btn1 press emits exactly one reinit pulse
pulses = 0
ctx.set(dut.btn, 0b101) # btn1 pressed
for _ in range(10):
await settle(ctx, 1)
pulses += (ctx.get(dut.reinit) & 1)
ctx.set(dut.btn, 0b111)
await settle(ctx, 6)
if pulses != 1:
errors.append(f"T5 reinit pulses: got {pulses}, want 1")
print(f"T5 btn1 → reinit pulses={pulses}")
# T6: freeze (btn2) makes a single rx pulse stick
ctx.set(dut.btn, 0b011) # btn2 press → toggle freeze on
await settle(ctx, 6)
ctx.set(dut.btn, 0b111)
await settle(ctx, 2)
ctx.set(dut.rx_pulse, 1) # one-shot while frozen
await settle(ctx, 1)
ctx.set(dut.rx_pulse, 0)
await settle(ctx, 20) # well past stretch
stuck = (ctx.get(dut.led) >> 2) & 1
if not stuck:
errors.append("T6 frozen rx LED did not stick")
ctx.set(dut.btn, 0b011) # toggle freeze off
await settle(ctx, 6)
ctx.set(dut.btn, 0b111)
await settle(ctx, 2)
cleared = ((ctx.get(dut.led) >> 2) & 1) == 0
if not cleared:
errors.append("T6 rx LED did not clear after unfreeze")
print(f"T6 freeze: stuck={stuck} cleared_after_unfreeze={cleared}")
sim = Simulator(dut)
sim.add_clock(Period(MHz=24), domain="sync")
sim.add_testbench(testbench)
sim.run()
if errors:
print("\nFAILURES:")
for e in errors:
print(" ", e)
sys.exit(1)
else:
print("\nAll tests passed.")
+197
View File
@@ -0,0 +1,197 @@
"""Synthesis script for BBATop → iCEbreaker (iCE40UP5K SG48).
Run from workspace root:
python -m exi_bba.synth # synthesize only
python -m exi_bba.synth --flash # synthesize and flash
This file re-declares IceBreakerPlatform inline so that importing
rebbarb/rebbarb.py (which has a module-level platform.build() call) is avoided.
"""
import os
import subprocess
import sys
from amaranth import *
from amaranth.build import *
from amaranth.vendor import LatticeICE40Platform
from exi_bba.bba_top import BBATop
# ── Platform definition ───────────────────────────────────────────────────
# Pin assignments use the iCEbreaker PMOD connectors as placeholders.
# Replace with actual SP1-interposer pin numbers once PCB is finalised.
#
# PMOD1A (J2): pins 4 2 47 45 / 3 48 46 44 (top/bottom)
# PMOD1B (J3): pins 43 38 34 31 / 42 36 32 28
# PMOD2 (J4): pins 27 25 21 19 / 26 23 20 18
#
# EXI : CLK=4 MOSI=2 MISO=47 CS_N=45 INT_N=3 (PMOD1A)
# W5100 : indirect parallel bus — 15 pins across PMOD1B + PMOD2.
# ADDR[1:0]=43 38 DATA[7:0]=34 31 42 36 32 28 27 25
# CS_N=21 RD_N=19 WR_N=26 INT_N=23 RST_N=20 (pin 18 free)
# Board: tie the W5100's upper address lines A[14:2] to 0 (only A[1:0] wired);
# DATA[7:0] is bidirectional (SB_IO tristate, single shared output-enable).
class IceBreakerPlatform(LatticeICE40Platform):
device = "iCE40UP5K"
package = "SG48"
default_clk = "clk12"
resources = [
Resource("clk12", 0,
Pins("35", dir="i"),
Clock(12e6),
Attrs(GLOBAL=True, IO_STANDARD="SB_LVCMOS")),
# EXI interface (GC side, SPI Mode 3) — PMOD1A FPGA pins
Resource("exi", 0,
Subsignal("clk", Pins("4", dir="i")),
Subsignal("mosi", Pins("2", dir="i")),
Subsignal("miso", Pins("47", dir="o")),
Subsignal("cs_n", Pins("45", dir="i")),
Subsignal("int_n", Pins("3", dir="o")),
Attrs(IO_STANDARD="SB_LVCMOS")),
# W5100 indirect parallel bus — PMOD1B + PMOD2 FPGA pins
Resource("w5100", 0,
Subsignal("addr", Pins("43 38", dir="o")),
Subsignal("data", Pins("34 31 42 36 32 28 27 25", dir="io")),
Subsignal("cs_n", Pins("21", dir="o")),
Subsignal("rd_n", Pins("19", dir="o")),
Subsignal("wr_n", Pins("26", dir="o")),
Subsignal("int_n", Pins("23", dir="i")),
Subsignal("rst_n", Pins("20", dir="o")),
Attrs(IO_STANDARD="SB_LVCMOS")),
# Bring-up status panel → iCEbreaker ONBOARD parts (dedicated pins, not
# on any PMOD, so they coexist with EXI + W5100). LEDR/LEDG are
# active-low discrete LEDs; BTN_N is the user button.
# (The onboard RGB LED on pins 39/40/41 needs an SB_RGBA_DRV instance
# wired to raw pads — board/version-specific — left as a future add-on
# to expose rx/tx/ready as colours; the 2 discrete LEDs cover bring-up.)
Resource("ledr", 0, Pins("11", dir="o"), Attrs(IO_STANDARD="SB_LVCMOS")),
Resource("ledg", 0, Pins("37", dir="o"), Attrs(IO_STANDARD="SB_LVCMOS")),
Resource("btn", 0, Pins("10", dir="i"), Attrs(IO_STANDARD="SB_LVCMOS")),
]
connectors = []
def toolchain_program(self, products, name):
iceprog = os.environ.get("ICEPROG", "iceprog")
with products.extract(f"{name}.bin") as bitstream_filename:
subprocess.check_call([iceprog, bitstream_filename])
# ── BBATop with platform resource wiring ─────────────────────────────────
class BBATopSynth(BBATop):
"""BBATop with platform pin connections added in elaborate()."""
def elaborate(self, platform):
m = super().elaborate(platform)
if platform is not None:
exi = platform.request("exi", 0)
w5100 = platform.request("w5100", 0)
m.d.comb += [
self.exi_clk .eq(exi.clk.i),
self.exi_mosi .eq(exi.mosi.i),
self.exi_cs_n .eq(exi.cs_n.i),
exi.miso.o .eq(self.exi_miso),
exi.int_n.o .eq(self.int_n),
# W5100 parallel bus (DATA[7:0] bidirectional via SB_IO)
w5100.addr.o .eq(self.w5100_addr),
w5100.data.o .eq(self.w5100_data_o),
w5100.data.oe .eq(self.w5100_data_oe),
self.w5100_data_i.eq(w5100.data.i),
w5100.cs_n.o .eq(self.w5100_cs_n),
w5100.rd_n.o .eq(self.w5100_rd_n),
w5100.wr_n.o .eq(self.w5100_wr_n),
self.w5100_int_n .eq(w5100.int_n.i),
w5100.rst_n.o .eq(self.w5100_rst_n),
]
# ── Bring-up status panel → onboard LEDs / button ──────────────
# Two discrete LEDs answer the #1 bring-up question on a real GC:
# LEDG = heartbeat (clock alive) LEDR = EXI activity (GC talking)
# The one onboard button → panel btn[1] (manual re-init).
if self._status_panel:
ledr = platform.request("ledr", 0)
ledg = platform.request("ledg", 0)
btn = platform.request("btn", 0)
led = self.panel_led
m.d.comb += [
ledg.o.eq(~led[0]), # heartbeat (active-low LED)
ledr.o.eq(~led[1]), # EXI activity (active-low LED)
# btn[0]/[2] held released (active-low idle = 1)
self.panel_btn.eq(Cat(C(1, 1), btn.i, C(1, 1))),
]
return m
# ── Entry point ───────────────────────────────────────────────────────────
#
# Seed sweep: nextpnr placement is stochastic. With ~22% LC utilisation
# routing dominates timing, so different seeds can vary fmax by ±20%.
# Pass --seeds N to try N seeds (default 1, i.e. seed 1 only).
# The build directory is reused across seeds; the final artefact in
# build/top.bin is the result of the last (or best) seed tried.
if __name__ == "__main__":
do_flash = "--flash" in sys.argv
n_seeds = next((int(sys.argv[i+1]) for i, a in enumerate(sys.argv)
if a == "--seeds"), 1)
platform = IceBreakerPlatform()
print(f"Synthesizing BBATop for {platform.device}-{platform.package} "
f"(do_program={do_flash}, seeds=1..{n_seeds})")
best_seed = 1
best_fmax = 0.0
for seed in range(1, n_seeds + 1):
print(f"\n{'='*60}")
print(f" Seed {seed}/{n_seeds}")
print(f"{'='*60}")
opts = (f"--opt-timing --seed {seed} --timing-allow-fail")
try:
platform.build(BBATopSynth(status_panel=True), do_program=False,
verbose=True, nextpnr_opts=opts)
except Exception as exc:
# nextpnr exits non-zero even with --timing-allow-fail on some
# versions; treat as non-fatal timing failure.
print(f" [seed {seed}] build exception (timing?): {exc}")
# Parse fmax from nextpnr log in build/top.tim (if present)
import glob, re
tim_files = glob.glob("build/top.tim") + glob.glob("build/*.tim")
fmax_exi = 0.0
for tf in tim_files:
try:
with open(tf) as f:
for line in f:
m_ = re.search(
r"Max frequency.*exi.*?:\s*([\d.]+)\s*MHz", line)
if m_:
fmax_exi = float(m_.group(1))
except OSError:
pass
print(f" [seed {seed}] exi fmax extracted: {fmax_exi:.1f} MHz")
if fmax_exi > best_fmax:
best_fmax = fmax_exi
best_seed = seed
print(f"\nBest seed: {best_seed} exi fmax: {best_fmax:.1f} MHz")
if do_flash:
print(f"\nFlashing with seed {best_seed}...")
opts = f"--opt-timing --seed {best_seed} --timing-allow-fail"
platform.build(BBATopSynth(status_panel=True), do_program=True,
verbose=True, nextpnr_opts=opts)
print("Done.")
+253
View File
@@ -0,0 +1,253 @@
"""TX frame drain — sync domain (24 MHz).
Drains the tx_bytes AsyncFIFO (written by BBARegisterFile in the exi domain),
forwards each byte to W5500SPIMaster with SOF/EOF framing, then pulses tx_irq
to notify the GC that the transmit is complete.
Flow
----
1. Wait for tx_len FIFO to have a length word (signals a complete frame queued).
2. Pop the length from tx_len FIFO.
3. Assert tx_sof on first byte, tx_eof on last byte, consuming tx_bytes FIFO.
4. When W5500SPIMaster accepts the final byte: pulse tx_irq.
The tx_bytes AsyncFIFO (exi→sync, 8-bit, depth=16) and tx_ctrl FIFO (exi→sync,
16-bit, depth=4) are instantiated in BBARegisterFile and their sync-domain read
sides are exposed as ports wired here by BBATop.
"""
from amaranth import *
__all__ = ["TXFrameDrain"]
class TXFrameDrain(Elaboratable):
"""Drains BBA TX FIFOs and forwards frames to W5500SPIMaster.
TX FIFO read interfaces (async FIFOs, sync-domain read side)
---------------------------------------------------------------
tx_bytes_r_data / tx_bytes_r_en / tx_bytes_r_rdy : byte stream
tx_ctrl_r_data / tx_ctrl_r_en / tx_ctrl_r_rdy : 16-bit frame length
W5500 streaming output (sync domain, to W5500SPIMaster)
-------------------------------------------------------
tx_data / tx_valid / tx_ready / tx_sof / tx_eof
CDC output (sync→exi, via PulseSynchronizer in BBATop)
-------------------------------------------------------
tx_irq : 1-cycle pulse when frame transmission is handed off to W5500SPIMaster
"""
def __init__(self):
# tx_bytes FIFO read side
self.tx_bytes_r_data = Signal(8)
self.tx_bytes_r_en = Signal()
self.tx_bytes_r_rdy = Signal()
# tx_ctrl FIFO read side (frame length)
self.tx_ctrl_r_data = Signal(16)
self.tx_ctrl_r_en = Signal()
self.tx_ctrl_r_rdy = Signal()
# W5500 streaming TX interface
self.tx_data = Signal(8)
self.tx_valid = Signal()
self.tx_ready = Signal()
self.tx_sof = Signal()
self.tx_eof = Signal()
# TX done pulse → PulseSynchronizer
self.tx_irq = Signal()
def elaborate(self, platform):
m = Module()
frame_len = Signal(16) # bytes still to LOAD from FIFO (incl. held one)
is_first = Signal() # next byte loaded is the first (SOF)
load_pending = Signal() # 1-bit "more bytes to load" flag (replaces
# a 16-bit frame_len!=0 compare in the
# combinational FIFO read-enable path)
# ── Registered holding stage presented to W5500 ──────────────────
# All W5500-facing outputs are driven from these registers. This
# breaks the long combinational path that previously ran from the
# tx_bytes FIFO read pointer, out through W5500 (tx_ready) and the
# is_first/eof logic, and back into the FIFO pointer increment — the
# sync-domain critical path. The FIFO read-enable now depends only on
# the registered hold_valid and the FIFO's own r_rdy.
hold_data = Signal(8)
hold_valid = Signal()
hold_sof = Signal()
hold_eof = Signal()
m.d.sync += self.tx_irq.eq(0) # default
m.d.comb += [
self.tx_data .eq(hold_data),
self.tx_valid.eq(hold_valid),
self.tx_sof .eq(hold_sof),
self.tx_eof .eq(hold_eof),
]
# W5500 took the currently-held byte this cycle
hold_consumed = Signal()
m.d.comb += hold_consumed.eq(hold_valid & self.tx_ready)
# FIFO read-enable defaults (combinational, no W5500 dependency)
m.d.comb += self.tx_bytes_r_en.eq(0)
m.d.comb += self.tx_ctrl_r_en .eq(0)
with m.FSM(domain="sync", name="tx_fsm"):
with m.State("IDLE"):
# Wait for a complete frame length in tx_ctrl FIFO
with m.If(self.tx_ctrl_r_rdy):
m.d.comb += self.tx_ctrl_r_en.eq(1)
m.d.sync += frame_len.eq(self.tx_ctrl_r_data)
m.d.sync += is_first.eq(1)
# A frame with length 0 has nothing to load.
m.d.sync += load_pending.eq(self.tx_ctrl_r_data != 0)
m.next = "DRAIN"
with m.State("DRAIN"):
# Load the next byte into the holding register only when it is
# empty. Costs one idle sync cycle per byte, negligible
# against the W5500 SPI rate (~16 sync cycles/byte), and keeps
# tx_ready off the FIFO read-enable path entirely.
#
# The gate uses the registered 1-bit load_pending instead of a
# 16-bit (frame_len != 0) reduction, so the combinational path
# consume_r_gry → r_rdy → do_load → tx_bytes_r_en stays shallow.
do_load = Signal()
m.d.comb += do_load.eq(
~hold_valid & self.tx_bytes_r_rdy & load_pending
)
m.d.comb += self.tx_bytes_r_en.eq(do_load)
with m.If(hold_consumed):
m.d.sync += hold_valid.eq(0)
with m.If(hold_eof):
m.d.sync += self.tx_irq.eq(1)
m.next = "IDLE"
with m.If(do_load):
m.d.sync += hold_data .eq(self.tx_bytes_r_data)
m.d.sync += hold_valid.eq(1)
m.d.sync += hold_sof .eq(is_first)
m.d.sync += hold_eof .eq(frame_len == 1)
m.d.sync += is_first .eq(0)
m.d.sync += frame_len .eq(frame_len - 1)
# Last byte just loaded → stop further loads (registered).
with m.If(frame_len == 1):
m.d.sync += load_pending.eq(0)
return m
# ── Testbench ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
import sys
from amaranth.sim import Simulator, Period
dut = TXFrameDrain()
errors = []
async def _send_frame(ctx, frame):
"""Drive one frame through the TXFrameDrain DUT.
Returns (received_bytes, seen_sof, seen_eof, saw_irq).
Key timing: tx_sof/tx_eof are combinatorial outputs that depend on
registered signals (is_first, frame_len) BEFORE they update. We read
them BEFORE each tick to capture the correct values, then advance the
FIFO AFTER the tick.
"""
ctx.set(dut.tx_ctrl_r_data, len(frame))
ctx.set(dut.tx_ctrl_r_rdy, 1)
ctx.set(dut.tx_bytes_r_data, frame[0])
ctx.set(dut.tx_bytes_r_rdy, 1)
# Tick 0: IDLE pops ctrl word (comb), FSM→DRAIN, frame_len registered
await ctx.tick("sync").repeat(1)
# Deassert ctrl FIFO so FSM doesn't re-pop when it returns to IDLE
ctx.set(dut.tx_ctrl_r_rdy, 0)
received = []
seen_sof = False
seen_eof = False
saw_irq = False
for _ in range(len(frame) + 10):
# Read comb signals BEFORE the tick (is_first and frame_len still
# reflect pre-tick registered values, so sof/eof are correct)
if ctx.get(dut.tx_valid):
d = ctx.get(dut.tx_data)
sof = ctx.get(dut.tx_sof)
eof = ctx.get(dut.tx_eof)
received.append(d)
seen_sof = seen_sof or sof
seen_eof = seen_eof or eof
await ctx.tick("sync").repeat(1)
if ctx.get(dut.tx_irq):
saw_irq = True
break
# Advance FIFO AFTER the tick: present next byte for next tick
if len(received) < len(frame):
ctx.set(dut.tx_bytes_r_data, frame[len(received)])
elif len(received) == len(frame):
ctx.set(dut.tx_bytes_r_rdy, 0)
return received, seen_sof, seen_eof, saw_irq
async def testbench(ctx):
await ctx.tick("sync").repeat(2)
ctx.set(dut.tx_ready, 1)
# ── T1: 4-byte frame ─────────────────────────────────────────────────
frame = [0xDE, 0xAD, 0xBE, 0xEF]
received, seen_sof, seen_eof, saw_irq = await _send_frame(ctx, frame)
print(f"T1 received={[hex(b) for b in received]} sof={seen_sof} eof={seen_eof} tx_irq={saw_irq}")
if received != frame:
errors.append(f"T1 bytes mismatch: got {received}, want {frame}")
if not seen_sof:
errors.append("T1: SOF never seen")
if not seen_eof:
errors.append("T1: EOF never seen")
if not saw_irq:
errors.append("T1: tx_irq never pulsed")
await ctx.tick("sync").repeat(4)
# ── T2: Single-byte frame — SOF and EOF on same byte ─────────────────
frame2 = [0x42]
received2, s2_sof, s2_eof, s2_irq = await _send_frame(ctx, frame2)
print(f"T2 byte=0x{received2[0] if received2 else 0:02X} sof={s2_sof} eof={s2_eof} tx_irq={s2_irq}")
if received2 != frame2:
errors.append(f"T2: bytes wrong, got {received2}")
if not (s2_sof and s2_eof):
errors.append("T2: SOF+EOF both must be set for 1-byte frame")
if not s2_irq:
errors.append("T2: tx_irq not seen for 1-byte frame")
sim = Simulator(dut)
sim.add_clock(Period(MHz=24), domain="sync")
sim.add_testbench(testbench)
with sim.write_vcd("TXFrameDrain.vcd"):
sim.run()
if errors:
print("\nFAILURES:")
for e in errors:
print(" ", e)
sys.exit(1)
else:
print("\nAll tests passed.")
+840
View File
@@ -0,0 +1,840 @@
"""W5100 parallel-bus master — sync domain.
A drop-in alternative to `W5500SPIMaster` that talks to a WIZnet **W5100** over
its **indirect parallel bus** instead of SPI. The external streaming interface
(init_req/init_done/par, tx_*, rx_*) is identical, so BBATop wiring is unchanged;
only the physical pins differ (a parallel bus instead of 4 SPI wires).
Why parallel
------------
SPI serialises 8 bits per byte, so on this UP5K (whose W5500-operating logic
closes only ~40 MHz) the SPI byte rate caps at ~12 Mbit/s. A parallel bus moves
a whole byte per access, so the same ~24 MHz sync logic clears the 27 Mbit/s EXI
ceiling — the real hard limit — with margin. See CLAUDE.md.
W5100 indirect bus interface (IDM)
----------------------------------
Only two address lines A[1:0] are wired (the upper address lines are tied to 0
on the board, so a power-up *direct*-mode access at A=00 still lands on MR):
A[1:0] register
00 MR (Mode Register — also reachable directly at power-up)
01 IDM_AR0 (indirect address, high byte)
10 IDM_AR1 (indirect address, low byte)
11 IDM_DR (indirect data — accesses mem[IDM_AR]; auto-increments
IDM_AR when MR.AI is set)
So a register/buffer access is: write IDM_AR0/AR1 with the 16-bit address, then
read/write IDM_DR. With MR.AI=1 a multi-byte block is one address-set followed
by a burst of IDM_DR accesses (the chip auto-increments) — used for SHAR and for
streaming frame data.
A bus cycle drives A + (for writes) D with /CS and /RD or /WR asserted for
`strobe_cycles` sync clocks (≥ the W5100's ~80 ns access time at 24 MHz).
Phase status
------------
Phase 1 (this file): bus access engine + transaction engine + init sequence,
verified against a W5100 bus model. TX/RX MACRAW (with socket-buffer ring
wraparound) land in phases 23.
"""
from amaranth import *
__all__ = ["W5100ParallelMaster"]
# ── W5100 register addresses (indirect 16-bit address space) ────────────────
_MR = 0x0000 # Mode register (common)
_SHAR0 = 0x0009 # Source MAC, 6 bytes
_IR = 0x0015 # Interrupt register
_IMR = 0x0016 # Interrupt mask
_RMSR = 0x001A # RX memory size (2 bits/socket)
_TMSR = 0x001B # TX memory size
_S0_MR = 0x0400 # Socket 0 mode
_S0_CR = 0x0401 # Socket 0 command
_S0_IR = 0x0402 # Socket 0 interrupt
_S0_SR = 0x0403 # Socket 0 status
_S0_TX_FSR = 0x0420 # Socket 0 TX free size (2 bytes)
_S0_TX_RD = 0x0422 # Socket 0 TX read pointer
_S0_TX_WR = 0x0424 # Socket 0 TX write pointer
_S0_RX_RSR = 0x0426 # Socket 0 RX received size (2 bytes)
_S0_RX_RD = 0x0428 # Socket 0 RX read pointer
_TX_BASE = 0x4000 # Socket 0 TX buffer base (default 2 KB window)
_RX_BASE = 0x6000 # Socket 0 RX buffer base
_S0_TX_MASK = 0x07FF # 2 KB ring mask
_S0_RX_MASK = 0x07FF
# MR bits / command / mode values
_MR_RST = 0x80
_MR_AI = 0x02 # address auto-increment (indirect mode)
_MR_IND = 0x01 # indirect bus interface mode
_S0_MR_MACRAW = 0x04
_CR_OPEN = 0x01
_CR_SEND = 0x20
_CR_RECV = 0x40
# Indirect-mode address selects (A[1:0])
_A_MR = 0b00
_A_AR0 = 0b01 # IDM_AR high byte
_A_AR1 = 0b10 # IDM_AR low byte
_A_DR = 0b11 # IDM_DR (data)
class W5100ParallelMaster(Elaboratable):
"""W5100 master over the indirect parallel bus, sync clock domain.
Physical bus pins
-----------------
bus_addr : A[1:0] output
bus_data_o : D[7:0] output value (drive when bus_data_oe=1)
bus_data_oe: data-bus output enable (1=FPGA drives D, 0=W5100 drives D)
bus_data_i : D[7:0] input value (sampled during reads)
cs_n / rd_n / wr_n : bus control (active low)
w5100_int_n : W5100 INT_N input (active low)
w5100_rst_n : W5100 hardware reset (active low)
Init / TX / RX interfaces are identical to W5500SPIMaster.
"""
def __init__(self, strobe_cycles=3, reset_cycles=24000):
# /RD//WR strobe width in sync cycles (≥ W5100 access time).
self._strobe = strobe_cycles
# MR-reset settle wait; testbench overrides with a small value.
self._reset_cycles = reset_cycles
# Physical parallel bus
self.bus_addr = Signal(2)
self.bus_data_o = Signal(8)
self.bus_data_oe = Signal()
self.bus_data_i = Signal(8)
self.cs_n = Signal(init=1)
self.rd_n = Signal(init=1)
self.wr_n = Signal(init=1)
self.w5100_int_n = Signal(init=1)
self.w5100_rst_n = Signal(init=1)
# Init control
self.init_req = Signal()
self.init_done = Signal()
self.par = Signal(48) # MAC address (PAR0..5 packed)
# TX stream
self.tx_data = Signal(8)
self.tx_valid = Signal()
self.tx_ready = Signal()
self.tx_sof = Signal()
self.tx_eof = Signal()
# RX stream
self.rx_data = Signal(8)
self.rx_valid = Signal()
self.rx_ready = Signal()
self.rx_sof = Signal()
self.rx_eof = Signal()
def elaborate(self, platform):
m = Module()
STROBE = self._strobe
# ── Bus access engine: one indirect-bus read or write cycle ──────────
bus_go = Signal()
bus_rw = Signal() # 1 = write, 0 = read
bus_a = Signal(2)
bus_wdata = Signal(8)
bus_rdata = Signal(8)
bus_done = Signal()
bus_ctr = Signal(range(STROBE + 2))
rw_r = Signal()
# registered physical outputs
a_o = Signal(2)
d_o = Signal(8)
d_oe = Signal()
cs_r = Signal(init=1)
rd_r = Signal(init=1)
wr_r = Signal(init=1)
m.d.comb += [
self.bus_addr .eq(a_o),
self.bus_data_o .eq(d_o),
self.bus_data_oe.eq(d_oe),
self.cs_n .eq(cs_r),
self.rd_n .eq(rd_r),
self.wr_n .eq(wr_r),
]
m.d.sync += bus_done.eq(0)
with m.FSM(domain="sync", name="bus_fsm"):
with m.State("IDLE"):
m.d.sync += [cs_r.eq(1), rd_r.eq(1), wr_r.eq(1), d_oe.eq(0)]
with m.If(bus_go):
m.d.sync += [a_o.eq(bus_a), rw_r.eq(bus_rw),
cs_r.eq(0), bus_ctr.eq(0)]
with m.If(bus_rw):
m.d.sync += [d_o.eq(bus_wdata), d_oe.eq(1), wr_r.eq(0)]
with m.Else():
m.d.sync += rd_r.eq(0)
m.next = "STROBE"
with m.State("STROBE"):
m.d.sync += bus_ctr.eq(bus_ctr + 1)
with m.If(bus_ctr == STROBE - 1):
with m.If(~rw_r):
m.d.sync += bus_rdata.eq(self.bus_data_i) # sample read
m.d.sync += [rd_r.eq(1), wr_r.eq(1)]
m.next = "FINISH"
with m.State("FINISH"):
m.d.sync += [cs_r.eq(1), d_oe.eq(0), bus_done.eq(1)]
m.next = "IDLE"
# ── Transaction engine: address-set + payload over the bus engine ────
WBUF = 8
xfer_start = Signal()
xfer_direct = Signal() # 1 = single A=00 access (MR), addr ignored
xfer_addr = Signal(16)
xfer_rw = Signal() # payload direction: 1=write, 0=read
xfer_len = Signal(range(WBUF + 1))
xfer_stream = Signal() # stream-write payload from s_*
xfer_sread = Signal() # stream-read payload to r_*
xfer_rcount = Signal(16)
xfer_done = Signal()
wbuf = Array([Signal(8, name=f"wbuf{i}") for i in range(WBUF)])
rbuf = Array([Signal(8, name=f"rbuf{i}") for i in range(WBUF)])
s_count = Signal(16) # bytes streamed-written (advances pointers)
xfer_idx = Signal(range(WBUF + 1))
s_last_r = Signal()
r_idx = Signal(16)
# Streaming payload interfaces.
s_data, s_valid, s_last, s_consume = Signal(8), Signal(), Signal(), Signal()
r_data, r_valid, r_first, r_last, r_ready = (
Signal(8), Signal(), Signal(), Signal(), Signal())
# TX stream source = external tx interface (Phase 2).
m.d.comb += [s_data.eq(self.tx_data), s_valid.eq(self.tx_valid),
s_last.eq(self.tx_eof), self.tx_ready.eq(s_consume)]
# RX stream sink = external rx interface (Phase 3).
m.d.comb += [self.rx_data.eq(r_data), self.rx_valid.eq(r_valid),
self.rx_sof.eq(r_first), self.rx_eof.eq(r_last),
r_ready.eq(self.rx_ready)]
# Socket-buffer ring wraparound. Unlike the W5500, the W5100's IDM
# address does NOT auto-wrap at the socket-buffer boundary — it just
# increments linearly into the next region. So when a streamed access
# reaches `xfer_wend`, the engine re-sets IDM_AR back to `xfer_wbase`.
xfer_wrap = Signal()
xfer_wbase = Signal(16)
xfer_wend = Signal(16)
cur_addr = Signal(16)
m.d.comb += [bus_go.eq(0), bus_rw.eq(0), bus_a.eq(0), bus_wdata.eq(0)]
m.d.comb += [s_consume.eq(0), r_valid.eq(0), r_data.eq(0),
r_first.eq(0), r_last.eq(0)]
m.d.sync += xfer_done.eq(0)
def bus_write(a, data):
m.d.comb += [bus_go.eq(1), bus_rw.eq(1), bus_a.eq(a), bus_wdata.eq(data)]
def bus_read(a):
m.d.comb += [bus_go.eq(1), bus_rw.eq(0), bus_a.eq(a)]
with m.FSM(domain="sync", name="xfer_fsm"):
with m.State("IDLE"):
with m.If(xfer_start):
m.d.sync += [xfer_idx.eq(0), s_count.eq(0), r_idx.eq(0),
cur_addr.eq(xfer_addr)]
with m.If(xfer_direct):
m.next = "DIRECT"
with m.Else():
m.next = "AR_HI"
# Direct MR write (A=00)
with m.State("DIRECT"):
bus_write(_A_MR, wbuf[0])
m.next = "DIRECT_W"
with m.State("DIRECT_W"):
with m.If(bus_done):
m.next = "FINISH"
# Set indirect address IDM_AR (high then low)
with m.State("AR_HI"):
bus_write(_A_AR0, xfer_addr[8:16])
m.next = "AR_HI_W"
with m.State("AR_HI_W"):
with m.If(bus_done):
m.next = "AR_LO"
with m.State("AR_LO"):
bus_write(_A_AR1, xfer_addr[0:8])
m.next = "AR_LO_W"
with m.State("AR_LO_W"):
with m.If(bus_done):
with m.If(xfer_stream):
m.next = "SW_LOAD"
with m.Elif(xfer_sread):
m.next = "SR_LOAD"
with m.Elif(xfer_rw):
m.next = "WB_ISSUE"
with m.Else():
m.next = "RB_ISSUE"
# Fixed-length write from wbuf (IDM_DR burst, auto-increment)
with m.State("WB_ISSUE"):
bus_write(_A_DR, wbuf[xfer_idx])
m.next = "WB_WAIT"
with m.State("WB_WAIT"):
with m.If(bus_done):
m.d.sync += xfer_idx.eq(xfer_idx + 1)
with m.If(xfer_idx + 1 == xfer_len):
m.next = "FINISH"
with m.Else():
m.next = "WB_ISSUE"
# Fixed-length read into rbuf (with ring wrap, for the length header)
with m.State("RB_ISSUE"):
with m.If(xfer_wrap & (cur_addr == xfer_wend)):
m.next = "RB_WRAP_HI"
with m.Else():
bus_read(_A_DR)
m.next = "RB_WAIT"
with m.State("RB_WAIT"):
with m.If(bus_done):
m.d.sync += rbuf[xfer_idx].eq(bus_rdata)
m.d.sync += [xfer_idx.eq(xfer_idx + 1), cur_addr.eq(cur_addr + 1)]
with m.If(xfer_idx + 1 == xfer_len):
m.next = "FINISH"
with m.Else():
m.next = "RB_ISSUE"
with m.State("RB_WRAP_HI"):
bus_write(_A_AR0, xfer_wbase[8:16])
m.next = "RB_WRAP_HI_W"
with m.State("RB_WRAP_HI_W"):
with m.If(bus_done):
m.next = "RB_WRAP_LO"
with m.State("RB_WRAP_LO"):
bus_write(_A_AR1, xfer_wbase[0:8])
m.next = "RB_WRAP_LO_W"
with m.State("RB_WRAP_LO_W"):
with m.If(bus_done):
m.d.sync += cur_addr.eq(xfer_wbase)
m.next = "RB_ISSUE"
# Stream-write payload from s_* until s_last (with ring wrap)
with m.State("SW_LOAD"):
with m.If(xfer_wrap & (cur_addr == xfer_wend)):
m.next = "SW_WRAP_HI"
with m.Elif(s_valid):
bus_write(_A_DR, s_data)
m.d.sync += s_last_r.eq(s_last)
m.next = "SW_WAIT"
with m.State("SW_WAIT"):
with m.If(bus_done):
m.d.comb += s_consume.eq(1)
m.d.sync += [s_count.eq(s_count + 1), cur_addr.eq(cur_addr + 1)]
with m.If(s_last_r):
m.next = "FINISH"
with m.Else():
m.next = "SW_LOAD"
with m.State("SW_WRAP_HI"):
bus_write(_A_AR0, xfer_wbase[8:16])
m.next = "SW_WRAP_HI_W"
with m.State("SW_WRAP_HI_W"):
with m.If(bus_done):
m.next = "SW_WRAP_LO"
with m.State("SW_WRAP_LO"):
bus_write(_A_AR1, xfer_wbase[0:8])
m.next = "SW_WRAP_LO_W"
with m.State("SW_WRAP_LO_W"):
with m.If(bus_done):
m.d.sync += cur_addr.eq(xfer_wbase)
m.next = "SW_LOAD"
# Stream-read payload to r_* for rcount bytes (with ring wrap)
with m.State("SR_LOAD"):
with m.If(r_idx == xfer_rcount):
m.next = "FINISH"
with m.Elif(xfer_wrap & (cur_addr == xfer_wend)):
m.next = "SR_WRAP_HI"
with m.Else():
bus_read(_A_DR)
m.next = "SR_WAIT"
with m.State("SR_WAIT"):
with m.If(bus_done):
m.next = "SR_PUSH"
with m.State("SR_PUSH"):
m.d.comb += [r_data.eq(bus_rdata), r_valid.eq(1),
r_first.eq(r_idx == 0),
r_last.eq(r_idx + 1 == xfer_rcount)]
with m.If(r_ready):
m.d.sync += [r_idx.eq(r_idx + 1), cur_addr.eq(cur_addr + 1)]
m.next = "SR_LOAD"
with m.State("SR_WRAP_HI"):
bus_write(_A_AR0, xfer_wbase[8:16])
m.next = "SR_WRAP_HI_W"
with m.State("SR_WRAP_HI_W"):
with m.If(bus_done):
m.next = "SR_WRAP_LO"
with m.State("SR_WRAP_LO"):
bus_write(_A_AR1, xfer_wbase[0:8])
m.next = "SR_WRAP_LO_W"
with m.State("SR_WRAP_LO_W"):
with m.If(bus_done):
m.d.sync += cur_addr.eq(xfer_wbase)
m.next = "SR_LOAD"
with m.State("FINISH"):
m.d.sync += xfer_done.eq(1)
m.next = "IDLE"
# ── Control regs ─────────────────────────────────────────────────────
mac_shadow = Array([Signal(8, name=f"mac{i}") for i in range(6)])
wait_ctr = Signal(range(self._reset_cycles + 2))
tx_wr = Signal(16)
rx_rsr = Signal(16)
rx_rd = Signal(16)
pkt_len = Signal(16)
def write_reg(name, addr, payload, nxt, direct=False):
"""Emit a 2-state block that writes `payload` (a list) to `addr`."""
with m.State(name):
m.d.sync += [xfer_addr.eq(addr), xfer_rw.eq(1),
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
xfer_direct.eq(1 if direct else 0),
xfer_len.eq(len(payload))]
for i, b in enumerate(payload):
m.d.sync += wbuf[i].eq(b)
m.d.sync += xfer_start.eq(1)
m.next = name + "_W"
with m.State(name + "_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.next = nxt
# ── Main control FSM (Phase 1: init only) ────────────────────────────
with m.FSM(domain="sync", name="main_fsm"):
with m.State("IDLE"):
m.d.sync += self.init_done.eq(0)
with m.If(self.init_req):
for i in range(6):
m.d.sync += mac_shadow[i].eq(self.par[i*8:(i+1)*8])
m.next = "MR_RST"
with m.Elif(~self.w5100_int_n):
m.next = "RX_CHECK"
with m.Elif(self.tx_valid & self.tx_sof):
m.next = "TX_START"
# MR = 0x80 software reset (direct A=00), then settle.
write_reg("MR_RST", _MR, [_MR_RST], "MR_WAIT", direct=True)
with m.State("MR_WAIT"):
with m.If(wait_ctr == self._reset_cycles):
m.d.sync += wait_ctr.eq(0)
m.next = "MR_MODE"
with m.Else():
m.d.sync += wait_ctr.eq(wait_ctr + 1)
# MR = indirect + auto-increment (direct A=00).
write_reg("MR_MODE", _MR, [_MR_IND | _MR_AI], "SHAR", direct=True)
# SHAR = source MAC (6-byte auto-increment burst).
with m.State("SHAR"):
m.d.sync += [xfer_addr.eq(_SHAR0), xfer_rw.eq(1),
xfer_stream.eq(0), xfer_sread.eq(0),
xfer_direct.eq(0), xfer_len.eq(6)]
for i in range(6):
m.d.sync += wbuf[i].eq(mac_shadow[i])
m.d.sync += xfer_start.eq(1)
m.next = "SHAR_W"
with m.State("SHAR_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.next = "MEMSZ"
# RMSR/TMSR = 0x55 (2 KB per socket — default; socket 0 used).
write_reg("MEMSZ", _RMSR, [0x55, 0x55], "S0_MODE") # RMSR then TMSR
# Socket 0: MACRAW mode, OPEN, enable interrupt.
write_reg("S0_MODE", _S0_MR, [_S0_MR_MACRAW], "S0_OPEN")
write_reg("S0_OPEN", _S0_CR, [_CR_OPEN], "S0_IMR")
write_reg("S0_IMR", _IMR, [0x01], "INIT_DONE") # enable S0 IRQ
with m.State("INIT_DONE"):
m.d.sync += self.init_done.eq(1)
m.next = "IDLE"
# ── TX MACRAW ────────────────────────────────────────────────────
# read S0_TX_WR → stream frame into the TX buffer at that offset
# (ring-wrapping at the 2 KB boundary) → advance S0_TX_WR → SEND.
with m.State("TX_START"): # read S0_TX_WR (2 bytes)
m.d.sync += [xfer_addr.eq(_S0_TX_WR), xfer_rw.eq(0),
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
xfer_direct.eq(0), xfer_len.eq(2)]
m.d.sync += xfer_start.eq(1)
m.next = "TX_RDPTR_W"
with m.State("TX_RDPTR_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += tx_wr.eq(Cat(rbuf[1], rbuf[0])) # big-endian
m.next = "TX_DATA"
with m.State("TX_DATA"): # stream frame → TX buffer
m.d.sync += [xfer_addr.eq(_TX_BASE + (tx_wr & _S0_TX_MASK)),
xfer_rw.eq(1), xfer_stream.eq(1), xfer_sread.eq(0),
xfer_direct.eq(0), xfer_wrap.eq(1),
xfer_wbase.eq(_TX_BASE),
xfer_wend.eq(_TX_BASE + _S0_TX_MASK + 1)]
m.d.sync += xfer_start.eq(1)
m.next = "TX_DATA_W"
with m.State("TX_DATA_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += [xfer_stream.eq(0), xfer_wrap.eq(0),
tx_wr.eq(tx_wr + s_count)] # advanced pointer
m.next = "TX_UPDPTR"
with m.State("TX_UPDPTR"): # write back S0_TX_WR
m.d.sync += [xfer_addr.eq(_S0_TX_WR), xfer_rw.eq(1),
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
xfer_direct.eq(0), xfer_len.eq(2)]
m.d.sync += [wbuf[0].eq(tx_wr[8:16]), wbuf[1].eq(tx_wr[0:8])]
m.d.sync += xfer_start.eq(1)
m.next = "TX_UPDPTR_W"
with m.State("TX_UPDPTR_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.next = "TX_SEND"
# S0_CR = SEND
write_reg("TX_SEND", _S0_CR, [_CR_SEND], "IDLE")
# ── RX MACRAW ────────────────────────────────────────────────────
# On W5100 INT: read RX_RSR; if non-zero read RX_RD, read the 2-byte
# MACRAW length, stream (length2) frame bytes out (ring-wrapping),
# advance RX_RD by the length, issue RECV, clear the RECV interrupt.
with m.State("RX_CHECK"): # read S0_RX_RSR (2 bytes)
m.d.sync += [xfer_addr.eq(_S0_RX_RSR), xfer_rw.eq(0),
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
xfer_direct.eq(0), xfer_len.eq(2)]
m.d.sync += xfer_start.eq(1)
m.next = "RX_RSR_W"
with m.State("RX_RSR_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += rx_rsr.eq(Cat(rbuf[1], rbuf[0]))
m.next = "RX_RSR_CHK"
with m.State("RX_RSR_CHK"):
with m.If(rx_rsr == 0):
m.next = "IDLE" # nothing received
with m.Else():
m.next = "RX_RDPTR"
with m.State("RX_RDPTR"): # read S0_RX_RD (2 bytes)
m.d.sync += [xfer_addr.eq(_S0_RX_RD), xfer_rw.eq(0),
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
xfer_direct.eq(0), xfer_len.eq(2)]
m.d.sync += xfer_start.eq(1)
m.next = "RX_RDPTR_W"
with m.State("RX_RDPTR_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += rx_rd.eq(Cat(rbuf[1], rbuf[0]))
m.next = "RX_LEN"
with m.State("RX_LEN"): # read 2-byte MACRAW length (wrap)
m.d.sync += [xfer_addr.eq(_RX_BASE + (rx_rd & _S0_RX_MASK)),
xfer_rw.eq(0), xfer_stream.eq(0), xfer_sread.eq(0),
xfer_direct.eq(0), xfer_len.eq(2), xfer_wrap.eq(1),
xfer_wbase.eq(_RX_BASE),
xfer_wend.eq(_RX_BASE + _S0_RX_MASK + 1)]
m.d.sync += xfer_start.eq(1)
m.next = "RX_LEN_W"
with m.State("RX_LEN_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += pkt_len.eq(Cat(rbuf[1], rbuf[0]))
m.next = "RX_FRAME"
with m.State("RX_FRAME"): # stream (pkt_len2) frame bytes
m.d.sync += [xfer_addr.eq(_RX_BASE + ((rx_rd + 2) & _S0_RX_MASK)),
xfer_rw.eq(0), xfer_stream.eq(0), xfer_sread.eq(1),
xfer_direct.eq(0), xfer_rcount.eq(pkt_len - 2),
xfer_wrap.eq(1), xfer_wbase.eq(_RX_BASE),
xfer_wend.eq(_RX_BASE + _S0_RX_MASK + 1)]
m.d.sync += xfer_start.eq(1)
m.next = "RX_FRAME_W"
with m.State("RX_FRAME_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += [xfer_sread.eq(0), xfer_wrap.eq(0)]
m.next = "RX_UPDRD"
with m.State("RX_UPDRD"): # S0_RX_RD += pkt_len, write back
m.d.sync += [xfer_addr.eq(_S0_RX_RD), xfer_rw.eq(1),
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
xfer_direct.eq(0), xfer_len.eq(2)]
m.d.sync += [wbuf[0].eq((rx_rd + pkt_len)[8:16]),
wbuf[1].eq((rx_rd + pkt_len)[0:8])]
m.d.sync += xfer_start.eq(1)
m.next = "RX_UPDRD_W"
with m.State("RX_UPDRD_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.next = "RX_RECV"
# S0_CR = RECV, then clear the RECV interrupt bit (S0_IR[2]).
write_reg("RX_RECV", _S0_CR, [_CR_RECV], "RX_CLR_IR")
write_reg("RX_CLR_IR", _S0_IR, [0x04], "IDLE")
return m
# ── Testbench ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
import sys
from amaranth.sim import Simulator, Period
dut = W5100ParallelMaster(strobe_cycles=3, reset_cycles=10)
errors = []
MAC = [0x11, 0x22, 0x33, 0x44, 0x55, 0x66]
PAR = sum(b << (8 * i) for i, b in enumerate(MAC))
# Expected indirect-address writes captured by the model (addr, value).
# MR is written directly (A=00) → captured as ('MR', value).
EXPECTED = [
("MR", _MR_RST),
("MR", _MR_IND | _MR_AI),
(_SHAR0 + 0, MAC[0]), (_SHAR0 + 1, MAC[1]), (_SHAR0 + 2, MAC[2]),
(_SHAR0 + 3, MAC[3]), (_SHAR0 + 4, MAC[4]), (_SHAR0 + 5, MAC[5]),
(_RMSR + 0, 0x55), (_RMSR + 1, 0x55),
(_S0_MR, _S0_MR_MACRAW),
(_S0_CR, _CR_OPEN),
(_IMR, 0x01),
]
writes = [] # captured (addr-or-'MR', value) — IDM_DR + MR writes
model_mem = {} # W5100 memory image (registers + TX/RX buffers)
async def w5100_model(ctx):
"""W5100 indirect-bus slave model: tracks MR/IDM_AR, records IDM_DR and
MR writes, and drives bus_data_i for reads. Mode-0 timing: a write is
latched on /WR rising while /CS low; reads driven while /RD low."""
idm_ar = 0
mr = 0
prev_cs = prev_rd = prev_wr = 1
async for vals in ctx.tick("sync").sample(
dut.cs_n, dut.rd_n, dut.wr_n,
dut.bus_addr, dut.bus_data_o, dut.bus_data_oe):
cs, rd, wr, a, do, doe = vals[-6:]
ai = (mr >> 1) & 1 # MR.AI
# Drive read data while /RD asserted (combinational, before sample).
if cs == 0 and rd == 0:
if a == _A_MR:
val = mr
elif a == _A_AR0:
val = (idm_ar >> 8) & 0xFF
elif a == _A_AR1:
val = idm_ar & 0xFF
else:
val = model_mem.get(idm_ar, 0)
ctx.set(dut.bus_data_i, val)
# Latch write on /WR rising edge.
if cs == 0 and prev_wr == 0 and wr == 1:
if a == _A_MR:
mr = do
writes.append(("MR", do))
elif a == _A_AR0:
idm_ar = (idm_ar & 0x00FF) | (do << 8)
elif a == _A_AR1:
idm_ar = (idm_ar & 0xFF00) | do
else: # IDM_DR
model_mem[idm_ar] = do
writes.append((idm_ar, do))
# RECV command consumes the RX data: clear RSR (mirrors HW).
if idm_ar == _S0_CR and do == _CR_RECV:
model_mem[_S0_RX_RSR] = 0
model_mem[_S0_RX_RSR + 1] = 0
if ai:
idm_ar = (idm_ar + 1) & 0xFFFF
# Auto-increment after a data read (/RD rising, A=DR).
if cs == 0 and prev_rd == 0 and rd == 1 and a == _A_DR and ai:
idm_ar = (idm_ar + 1) & 0xFFFF
prev_cs, prev_rd, prev_wr = cs, rd, wr
async def testbench(ctx):
ctx.set(dut.par, PAR)
await ctx.tick("sync").repeat(2)
# T1: trigger init, wait for init_done.
ctx.set(dut.init_req, 1)
await ctx.tick("sync").repeat(1)
ctx.set(dut.init_req, 0)
done = False
for _ in range(4000):
await ctx.tick("sync").repeat(1)
if ctx.get(dut.init_done):
done = True
break
if not done:
errors.append("init_done never asserted")
print(f"T1 init captured {len(writes)} writes")
if writes != EXPECTED:
errors.append("init write sequence mismatch")
for i in range(max(len(writes), len(EXPECTED))):
g = writes[i] if i < len(writes) else None
e = EXPECTED[i] if i < len(EXPECTED) else None
mark = "" if g == e else " <-- MISMATCH"
gs = f"({g[0]:#06x},{g[1]:#04x})" if g and isinstance(g[0], int) else str(g)
es = f"({e[0]:#06x},{e[1]:#04x})" if e and isinstance(e[0], int) else str(e)
print(f" [{i:2}] got {gs:20} exp {es:20}{mark}")
else:
print("T1 init sequence matches expected (MR, SHAR, mem sizes, "
"S0 MACRAW/OPEN, IMR)")
# ── helper: stream one TX frame through the external tx interface ─────
async def feed_frame(ctx, frame):
for i, b in enumerate(frame):
ctx.set(dut.tx_data, b)
ctx.set(dut.tx_valid, 1)
ctx.set(dut.tx_sof, 1 if i == 0 else 0)
ctx.set(dut.tx_eof, 1 if i == len(frame) - 1 else 0)
got = False
for _ in range(400):
await ctx.tick("sync").repeat(1)
if ctx.get(dut.tx_ready):
got = True
break
if not got:
errors.append(f"feed_frame: byte {i} never consumed")
return
ctx.set(dut.tx_valid, 0)
ctx.set(dut.tx_sof, 0)
ctx.set(dut.tx_eof, 0)
# let TX_UPDPTR + SEND complete
for _ in range(200):
await ctx.tick("sync").repeat(1)
if model_mem.get(_S0_CR) == _CR_SEND:
break
# ── T2: TX MACRAW frame (TX_WR=0, no wrap) ───────────────────────────
FRAME = [0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x10, 0x20]
await feed_frame(ctx, FRAME)
buf = [model_mem.get(_TX_BASE + i, None) for i in range(len(FRAME))]
if buf != FRAME:
errors.append(f"T2 TX buffer mismatch: {buf} != {FRAME}")
tx_wr_hi = model_mem.get(_S0_TX_WR, 0)
tx_wr_lo = model_mem.get(_S0_TX_WR + 1, 0)
adv = (tx_wr_hi << 8) | tx_wr_lo
if adv != len(FRAME):
errors.append(f"T2 S0_TX_WR advance: got {adv}, want {len(FRAME)}")
if model_mem.get(_S0_CR) != _CR_SEND:
errors.append("T2 SEND command not issued")
print(f"T2 TX: buffer={['0x%02X' % b for b in buf]} "
f"TX_WR={adv} SEND={model_mem.get(_S0_CR)==_CR_SEND}")
# ── T3: TX MACRAW with ring wraparound (TX_WR near 2 KB boundary) ─────
# Pre-load S0_TX_WR = 0x07FE so a 6-byte frame straddles the boundary:
# offsets 0x7FE,0x7FF then wraps to 0x000,0x001,0x002,0x003.
model_mem[_S0_TX_WR] = 0x07
model_mem[_S0_TX_WR + 1] = 0xFE
model_mem[_S0_CR] = 0x00 # clear so we can detect the new SEND
WFRAME = [0x41, 0x42, 0x43, 0x44, 0x45, 0x46]
await feed_frame(ctx, WFRAME)
# expected physical layout
exp = {
_TX_BASE + 0x7FE: WFRAME[0],
_TX_BASE + 0x7FF: WFRAME[1],
_TX_BASE + 0x000: WFRAME[2],
_TX_BASE + 0x001: WFRAME[3],
_TX_BASE + 0x002: WFRAME[4],
_TX_BASE + 0x003: WFRAME[5],
}
for addr, want in exp.items():
got = model_mem.get(addr)
if got != want:
errors.append(f"T3 wrap byte @0x{addr:04X}: got {got}, want 0x{want:02X}")
adv2 = (model_mem.get(_S0_TX_WR, 0) << 8) | model_mem.get(_S0_TX_WR + 1, 0)
want_wr = (0x07FE + len(WFRAME)) & 0xFFFF
if adv2 != want_wr:
errors.append(f"T3 wrap S0_TX_WR: got 0x{adv2:04X}, want 0x{want_wr:04X}")
ok = all(model_mem.get(a) == v for a, v in exp.items())
print(f"T3 TX wrap: bytes_placed_ok={ok} TX_WR=0x{adv2:04X} (want 0x{want_wr:04X})")
# ── helper: drive an RX event and collect the streamed-out frame ─────
def load_rx(rx_rd_off, frame):
"""Place a MACRAW packet [len_hi,len_lo,frame...] in the RX buffer at
offset rx_rd_off (ring), set RX_RSR/RX_RD, return the 16-bit length."""
plen = len(frame) + 2
payload = [(plen >> 8) & 0xFF, plen & 0xFF] + list(frame)
for i, b in enumerate(payload):
off = (rx_rd_off + i) & _S0_RX_MASK
model_mem[_RX_BASE + off] = b
model_mem[_S0_RX_RSR] = (plen >> 8) & 0xFF
model_mem[_S0_RX_RSR + 1] = plen & 0xFF
model_mem[_S0_RX_RD] = (rx_rd_off >> 8) & 0xFF
model_mem[_S0_RX_RD + 1] = rx_rd_off & 0xFF
return plen
async def do_rx(ctx, rx_rd_off, frame):
plen = load_rx(rx_rd_off, frame)
ctx.set(dut.rx_ready, 1)
collected = []
ctx.set(dut.w5100_int_n, 0) # assert RX interrupt
for _ in range(1500):
await ctx.tick("sync").repeat(1)
if ctx.get(dut.rx_valid) and ctx.get(dut.rx_ready):
collected.append(ctx.get(dut.rx_data))
if model_mem.get(_S0_CR) == _CR_RECV:
break
ctx.set(dut.w5100_int_n, 1) # deassert; let it finish + idle
for _ in range(300):
await ctx.tick("sync").repeat(1)
ctx.set(dut.rx_ready, 0)
return collected, plen
# ── T4: RX MACRAW frame (RX_RD=0, no wrap) ───────────────────────────
model_mem[_S0_CR] = 0x00
RX_FRAME = [0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03]
got, plen = await do_rx(ctx, 0x0000, RX_FRAME)
if got != RX_FRAME:
errors.append(f"T4 RX frame mismatch: {['0x%02X'%b for b in got]} != "
f"{['0x%02X'%b for b in RX_FRAME]}")
new_rd = (model_mem.get(_S0_RX_RD, 0) << 8) | model_mem.get(_S0_RX_RD + 1, 0)
if new_rd != plen:
errors.append(f"T4 RX_RD advance: got 0x{new_rd:04X}, want 0x{plen:04X}")
print(f"T4 RX: frame={['0x%02X'%b for b in got]} RX_RD=0x{new_rd:04X} "
f"RECV={model_mem.get(_S0_CR)==_CR_RECV}")
# ── T5: RX MACRAW with ring wraparound (RX_RD near 2 KB boundary) ─────
model_mem[_S0_CR] = 0x00
RX_FRAME2 = [0x51, 0x52, 0x53, 0x54, 0x55]
# rx_rd = 0x07FD: [len_hi@7FD][len_lo@7FE][f0@7FF][f1@000][f2@001]...
got2, plen2 = await do_rx(ctx, 0x07FD, RX_FRAME2)
if got2 != RX_FRAME2:
errors.append(f"T5 RX wrap frame mismatch: {['0x%02X'%b for b in got2]} != "
f"{['0x%02X'%b for b in RX_FRAME2]}")
new_rd2 = (model_mem.get(_S0_RX_RD, 0) << 8) | model_mem.get(_S0_RX_RD + 1, 0)
want_rd2 = (0x07FD + plen2) & 0xFFFF
if new_rd2 != want_rd2:
errors.append(f"T5 RX wrap RX_RD: got 0x{new_rd2:04X}, want 0x{want_rd2:04X}")
print(f"T5 RX wrap: frame={['0x%02X'%b for b in got2]} "
f"RX_RD=0x{new_rd2:04X} (want 0x{want_rd2:04X})")
sim = Simulator(dut)
sim.add_clock(Period(MHz=24), domain="sync")
sim.add_testbench(testbench)
sim.add_process(w5100_model)
sim.run()
if errors:
print("\nFAILURES:")
for e in errors:
print(" ", e)
sys.exit(1)
else:
print("\nAll tests passed.")
+760
View File
@@ -0,0 +1,760 @@
"""W5500 SPI master — sync domain (24 MHz).
SPI Mode 0 (CPOL=0, CPHA=0): CLK idles LOW, data captured on rising edge.
SCK = 12 MHz: the sync domain is 24 MHz and the bit engine toggles SCK via a
clock-enable (sync ÷ 2).
W5500 frame format
------------------
Byte 01 Address (16-bit big-endian)
Byte 2 Control: [7:3]=BSB [2]=R/W [1:0]=OM
Byte 3+ Data
BSB values used here:
0b00000 Common registers
0b00001 Socket 0 registers
0b00010 Socket 0 TX buffer
0b00011 Socket 0 RX buffer
After NCRA reset the driver issues the W5500 init sequence (MR reset, SHAR,
S0_MR MACRAW, S0_CR OPEN, S0_IMR).
The module provides:
- A streaming TX interface (tx_data/tx_valid/tx_ready + sof/eof framing)
- A streaming RX interface (rx_data/rx_valid/rx_ready + sof/eof)
- init_req / init_done for the NCRA-triggered init sequence
- MAC source address shadow input (par[0..5]) for SHAR programming
"""
from amaranth import *
__all__ = ["W5500SPIMaster"]
# W5500 register addresses. The 16-bit address is the OFFSET WITHIN A BLOCK;
# the block is selected by the BSB field of the control byte (see _CTRL_*),
# NOT by the address. So socket-0 registers use small offsets with BSB=1.
_W5500_MR = 0x0000 # Mode register (common block)
_W5500_SHAR = 0x0009 # Source MAC, 6 bytes (common block)
_W5500_S0_MR = 0x0000 # Socket 0 Mode (socket-0 block)
_W5500_S0_CR = 0x0001 # Socket 0 Command
_W5500_S0_IR = 0x0002 # Socket 0 Interrupt
_W5500_S0_RXBUF_SIZE = 0x001E # Socket 0 RX buffer size
_W5500_S0_TXBUF_SIZE = 0x001F # Socket 0 TX buffer size
_W5500_S0_TX_FSR = 0x0020 # Socket 0 TX Free Size (2 bytes)
_W5500_S0_TX_WR = 0x0024 # Socket 0 TX Write Pointer
_W5500_S0_RX_RSR = 0x0026 # Socket 0 RX Received Size (2 bytes)
_W5500_S0_RX_RD = 0x0028 # Socket 0 RX Read Pointer
_W5500_S0_IMR = 0x002C # Socket 0 Interrupt Mask
# Control byte = (BSB << 3) | (RWB << 2) | OM.
# RWB: 1=write 0=read. OM=00 → Variable Data Mode (CS frames the length).
# BSB: 0=common, 1=socket0 reg, 2=socket0 TX buffer, 3=socket0 RX buffer.
_CTRL_WR_COMMON = (0 << 3) | (1 << 2) # 0x04
_CTRL_WR_S0REG = (1 << 3) | (1 << 2) # 0x0C
_CTRL_RD_S0REG = (1 << 3) | (0 << 2) # 0x08
_CTRL_WR_S0TX = (2 << 3) | (1 << 2) # 0x14
_CTRL_RD_S0RX = (3 << 3) | (0 << 2) # 0x18
class W5500SPIMaster(Elaboratable):
"""W5500 SPI master in the sync clock domain.
Physical SPI pins
-----------------
spi_clk / spi_mosi / spi_miso / spi_cs_n : to W5500
w5500_int_n : W5500 INT_N input (active low)
w5500_rst_n : W5500 hardware reset (active low)
Init interface (from BBARegisterFile / BBATop)
----------------------------------------------
init_req : pulse to trigger the W5500 init sequence
init_done : pulse when init sequence completes
par : 6-byte MAC address (sampled at init_req)
TX streaming interface (from TXFrameDrain, sync domain)
-------------------------------------------------------
tx_data / tx_valid / tx_ready : byte stream
tx_sof / tx_eof : frame delimiters on the same cycle as tx_valid
RX streaming interface (to RXFrameAssembler, sync domain)
----------------------------------------------------------
rx_data / rx_valid / rx_ready : byte stream
rx_sof / rx_eof : frame delimiters
"""
def __init__(self, clk_div=1, reset_cycles=24000):
# MR-reset settle wait (in sync cycles). ~1 ms; the testbench
# overrides with a small value for fast simulation.
self._reset_cycles = reset_cycles
# SPI SCK = sync_clock / (2 * clk_div). clk_div=1 → full rate (SCK =
# sync/2): at the 24 MHz slow domain that is 12 MHz SCK (~12 Mbit/s),
# which comfortably exceeds real-world GC BBA TCP throughput. The W5500
# tolerates up to 80 MHz SCK, so the divider exists only as a safety
# knob for board-level signal-integrity issues, not a functional need.
self._clk_div = clk_div
# Physical SPI
self.spi_clk = Signal()
self.spi_mosi = Signal()
self.spi_miso = Signal()
self.spi_cs_n = Signal(init=1)
self.w5500_int_n = Signal(init=1)
self.w5500_rst_n = Signal(init=1)
# Init control
self.init_req = Signal()
self.init_done = Signal()
self.par = Signal(48) # MAC address (PAR0..5 packed)
# TX stream
self.tx_data = Signal(8)
self.tx_valid = Signal()
self.tx_ready = Signal()
self.tx_sof = Signal()
self.tx_eof = Signal()
# RX stream
self.rx_data = Signal(8)
self.rx_valid = Signal()
self.rx_ready = Signal()
self.rx_sof = Signal()
self.rx_eof = Signal()
def elaborate(self, platform):
m = Module()
# ── SPI clock enable ─────────────────────────────────────────────
# clk_en high every `clk_div` sync cycles. The bit engine toggles SCK
# on each enabled cycle, so SCK = sync / (2 * clk_div).
clk_en = Signal()
if self._clk_div <= 1:
m.d.comb += clk_en.eq(1) # full rate: SCK = sync/2
else:
div_ctr = Signal(range(self._clk_div))
with m.If(div_ctr == self._clk_div - 1):
m.d.sync += div_ctr.eq(0)
with m.Else():
m.d.sync += div_ctr.eq(div_ctr + 1)
m.d.comb += clk_en.eq(div_ctr == self._clk_div - 1)
# ── SPI pin registers (Mode 0: SCK idles LOW) ────────────────────
sck_r = Signal()
cs_r = Signal(init=1)
shift_out = Signal(8)
shift_in = Signal(8)
m.d.comb += self.spi_clk .eq(sck_r)
m.d.comb += self.spi_cs_n.eq(cs_r)
m.d.comb += self.spi_mosi.eq(shift_out[7]) # MSB first; valid pre-rising
# ── Byte-transfer engine (Mode 0) ────────────────────────────────
# On byte_start, shift out byte_tx MSB-first (8 SCK cycles) and capture
# MISO into byte_rx; pulse byte_done. CS is owned by the xfer engine.
byte_start = Signal()
byte_tx = Signal(8)
byte_rx = Signal(8)
byte_done = Signal()
bit_ctr = Signal(4)
m.d.sync += byte_done.eq(0)
with m.FSM(domain="sync", name="byte_fsm"):
with m.State("IDLE"):
m.d.sync += sck_r.eq(0)
with m.If(byte_start):
m.d.sync += shift_out.eq(byte_tx)
m.d.sync += bit_ctr.eq(0)
m.next = "RUN"
with m.State("RUN"):
with m.If(clk_en):
with m.If(~sck_r):
# rising edge: slave samples MOSI, master samples MISO
m.d.sync += sck_r.eq(1)
m.d.sync += shift_in.eq(Cat(self.spi_miso, shift_in[:-1]))
with m.Else():
# falling edge: advance / finish
m.d.sync += sck_r.eq(0)
with m.If(bit_ctr == 7):
m.d.sync += byte_rx.eq(shift_in)
m.d.sync += byte_done.eq(1)
m.next = "IDLE"
with m.Else():
m.d.sync += shift_out.eq(Cat(0, shift_out[:-1]))
m.d.sync += bit_ctr.eq(bit_ctr + 1)
# ── Generic register transaction engine (Variable Data Mode) ─────
# One CS-low frame: 3 header bytes (addr_hi, addr_lo, ctrl) then
# xfer_len payload bytes. Writes source payload from wbuf; reads
# capture MISO into rbuf.
WBUF = 8
xfer_start = Signal()
xfer_addr = Signal(16)
xfer_ctrl = Signal(8)
xfer_len = Signal(range(WBUF + 1))
xfer_done = Signal()
wbuf = Array([Signal(8, name=f"wbuf{i}") for i in range(WBUF)])
rbuf = Array([Signal(8, name=f"rbuf{i}") for i in range(WBUF)])
xfer_idx = Signal(range(WBUF + 3))
# Stream-write mode: after the 3-byte header, payload bytes are pulled
# from (s_data, s_valid, s_last) instead of wbuf, until s_last. Used to
# forward a frame straight into the W5500 TX buffer. s_consume pulses
# as each streamed byte is accepted; s_count tracks the byte count.
xfer_stream = Signal()
s_data = Signal(8)
s_valid = Signal()
s_last = Signal()
s_consume = Signal()
s_count = Signal(16)
s_last_r = Signal() # latched s_last for the in-flight byte
# Stream-read mode: after the header, read `xfer_rcount` payload bytes
# (sending 0x00 dummies) and push each out via (r_data, r_valid,
# r_first, r_last) with r_ready back-pressure. Used to pull a frame
# out of the W5500 RX buffer into RXFrameAssembler.
xfer_sread = Signal()
xfer_rcount = Signal(16)
r_data = Signal(8)
r_valid = Signal()
r_first = Signal()
r_last = Signal()
r_ready = Signal()
r_idx = Signal(16)
x_byte = Signal(8)
with m.If(xfer_idx == 0):
m.d.comb += x_byte.eq(xfer_addr[8:16])
with m.Elif(xfer_idx == 1):
m.d.comb += x_byte.eq(xfer_addr[0:8])
with m.Elif(xfer_idx == 2):
m.d.comb += x_byte.eq(xfer_ctrl)
with m.Else():
m.d.comb += x_byte.eq(wbuf[xfer_idx - 3])
m.d.comb += byte_start.eq(0)
m.d.comb += byte_tx.eq(0)
m.d.comb += s_consume.eq(0)
m.d.comb += r_valid.eq(0)
m.d.comb += r_data.eq(0)
m.d.comb += r_first.eq(0)
m.d.comb += r_last.eq(0)
m.d.sync += xfer_done.eq(0)
with m.FSM(domain="sync", name="xfer_fsm"):
with m.State("IDLE"):
with m.If(xfer_start):
m.d.sync += cs_r.eq(0) # assert CS for the frame
m.d.sync += xfer_idx.eq(0)
m.d.sync += s_count.eq(0)
m.d.sync += r_idx.eq(0)
m.next = "LOAD"
with m.State("LOAD"):
m.d.comb += byte_tx.eq(x_byte)
m.d.comb += byte_start.eq(1)
m.next = "WAIT"
with m.State("WAIT"):
with m.If(byte_done):
with m.If(xfer_idx >= 3):
m.d.sync += rbuf[xfer_idx - 3].eq(byte_rx)
with m.If((xfer_idx == 2) & xfer_stream):
m.next = "SLOAD" # stream the payload (write)
with m.Elif((xfer_idx == 2) & xfer_sread):
m.next = "RLOAD" # stream the payload (read)
with m.Elif(~xfer_stream & ~xfer_sread
& (xfer_idx == (xfer_len + 2))):
m.next = "FINISH" # 3 header + len 1
with m.Else():
m.d.sync += xfer_idx.eq(xfer_idx + 1)
m.next = "LOAD"
# ── Streamed-payload sub-loop (TX buffer write) ──────────────
with m.State("SLOAD"):
with m.If(s_valid):
m.d.comb += byte_tx.eq(s_data)
m.d.comb += byte_start.eq(1)
m.d.sync += s_last_r.eq(s_last)
m.next = "SWAIT"
with m.State("SWAIT"):
with m.If(byte_done):
m.d.comb += s_consume.eq(1) # accept this frame byte
m.d.sync += s_count.eq(s_count + 1)
with m.If(s_last_r):
m.next = "FINISH"
with m.Else():
m.next = "SLOAD"
# ── Streamed-payload sub-loop (RX buffer read) ───────────────
with m.State("RLOAD"):
with m.If(r_idx == xfer_rcount):
m.next = "FINISH"
with m.Else():
m.d.comb += byte_tx.eq(0) # dummy MOSI during read
m.d.comb += byte_start.eq(1)
m.next = "RWAIT"
with m.State("RWAIT"):
with m.If(byte_done):
m.next = "RPUSH"
with m.State("RPUSH"):
m.d.comb += r_data .eq(byte_rx)
m.d.comb += r_valid.eq(1)
m.d.comb += r_first.eq(r_idx == 0)
m.d.comb += r_last .eq(r_idx == (xfer_rcount - 1))
with m.If(r_ready):
m.d.sync += r_idx.eq(r_idx + 1)
m.next = "RLOAD"
with m.State("FINISH"):
m.d.sync += cs_r.eq(1) # deassert CS
m.d.sync += xfer_done.eq(1)
m.next = "IDLE"
# Saved MAC for SHAR programming; current W5500 TX write pointer.
mac_shadow = Array([Signal(8, name=f"mac{i}") for i in range(6)])
wait_ctr = Signal(range(self._reset_cycles + 2))
tx_wr = Signal(16)
rx_rsr = Signal(16) # RX received size
rx_rd = Signal(16) # RX read pointer
pkt_len = Signal(16) # MACRAW packet length (incl. 2-byte header)
# Frame stream from TXFrameDrain feeds the xfer engine's stream port.
# tx_ready pulses (= s_consume) as each frame byte is taken into the
# TX-buffer write transaction.
m.d.comb += [
s_data .eq(self.tx_data),
s_valid.eq(self.tx_valid),
s_last .eq(self.tx_eof),
self.tx_ready.eq(s_consume),
]
# RX buffer read stream → RXFrameAssembler.
m.d.comb += [
self.rx_data .eq(r_data),
self.rx_valid.eq(r_valid),
self.rx_sof .eq(r_first),
self.rx_eof .eq(r_last),
r_ready .eq(self.rx_ready),
]
# Helper: a setup state that programs one register-write transaction
# then waits for it to complete and jumps to `nxt`.
def write_reg(name, addr, ctrl, payload, nxt):
with m.State(name):
m.d.sync += xfer_addr.eq(addr)
m.d.sync += xfer_ctrl.eq(ctrl)
m.d.sync += xfer_len.eq(len(payload))
m.d.sync += xfer_stream.eq(0)
m.d.sync += xfer_sread.eq(0)
for i, b in enumerate(payload):
m.d.sync += wbuf[i].eq(b)
m.d.sync += xfer_start.eq(1)
m.next = name + "_W"
with m.State(name + "_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.next = nxt
# ── Main control FSM ─────────────────────────────────────────────
with m.FSM(domain="sync", name="main_fsm"):
with m.State("IDLE"):
m.d.sync += self.init_done.eq(0)
with m.If(self.init_req):
for i in range(6):
m.d.sync += mac_shadow[i].eq(self.par[i*8:(i+1)*8])
m.next = "MR_RST"
with m.Elif(~self.w5500_int_n):
m.next = "RX_CHECK"
with m.Elif(self.tx_valid & self.tx_sof):
m.next = "TX_START"
# Step 1: MR = 0x80 (software reset), then settle ~1 ms.
write_reg("MR_RST", _W5500_MR, _CTRL_WR_COMMON, [0x80], "MR_WAIT")
with m.State("MR_WAIT"):
with m.If(wait_ctr == self._reset_cycles):
m.d.sync += wait_ctr.eq(0)
m.next = "SHAR"
with m.Else():
m.d.sync += wait_ctr.eq(wait_ctr + 1)
# Step 2: SHAR = source MAC (6 bytes from PAR05).
with m.State("SHAR"):
m.d.sync += xfer_addr.eq(_W5500_SHAR)
m.d.sync += xfer_ctrl.eq(_CTRL_WR_COMMON)
m.d.sync += xfer_len.eq(6)
for i in range(6):
m.d.sync += wbuf[i].eq(mac_shadow[i])
m.d.sync += xfer_start.eq(1)
m.next = "SHAR_W"
with m.State("SHAR_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.next = "S0_MR"
# Step 35: S0_MR=MACRAW, S0_CR=OPEN, S0_IMR=RECV|SEND_OK.
write_reg("S0_MR", _W5500_S0_MR, _CTRL_WR_S0REG, [0x04], "S0_CR")
write_reg("S0_CR", _W5500_S0_CR, _CTRL_WR_S0REG, [0x01], "S0_IMR")
write_reg("S0_IMR", _W5500_S0_IMR, _CTRL_WR_S0REG, [0x05], "INIT_DONE")
with m.State("INIT_DONE"):
m.d.sync += self.init_done.eq(1)
m.next = "IDLE"
# ── TX path (MACRAW) ─────────────────────────────────────────
# 1) read S0_TX_WR, 2) stream the frame into the TX buffer at that
# offset, 3) advance S0_TX_WR by the byte count, 4) issue SEND.
with m.State("TX_START"):
m.d.sync += xfer_addr.eq(_W5500_S0_TX_WR)
m.d.sync += xfer_ctrl.eq(_CTRL_RD_S0REG)
m.d.sync += xfer_len.eq(2)
m.d.sync += xfer_stream.eq(0)
m.d.sync += wbuf[0].eq(0) # read → send 0x00 dummies
m.d.sync += wbuf[1].eq(0)
m.d.sync += xfer_start.eq(1)
m.next = "TX_RDPTR_W"
with m.State("TX_RDPTR_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += tx_wr.eq(Cat(rbuf[1], rbuf[0])) # big-endian
m.next = "TX_DATA"
with m.State("TX_DATA"):
m.d.sync += xfer_addr.eq(tx_wr)
m.d.sync += xfer_ctrl.eq(_CTRL_WR_S0TX) # socket-0 TX buffer
m.d.sync += xfer_stream.eq(1)
m.d.sync += xfer_start.eq(1)
m.next = "TX_DATA_W"
with m.State("TX_DATA_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += xfer_stream.eq(0)
m.d.sync += tx_wr.eq(tx_wr + s_count) # advanced pointer
m.next = "TX_UPDPTR"
with m.State("TX_UPDPTR"):
m.d.sync += xfer_addr.eq(_W5500_S0_TX_WR)
m.d.sync += xfer_ctrl.eq(_CTRL_WR_S0REG)
m.d.sync += xfer_len.eq(2)
m.d.sync += xfer_stream.eq(0)
m.d.sync += wbuf[0].eq(tx_wr[8:16]) # hi (already advanced)
m.d.sync += wbuf[1].eq(tx_wr[0:8]) # lo
m.d.sync += xfer_start.eq(1)
m.next = "TX_UPDPTR_W"
with m.State("TX_UPDPTR_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.next = "TX_SEND"
# S0_CR = SEND (0x20)
write_reg("TX_SEND", _W5500_S0_CR, _CTRL_WR_S0REG, [0x20], "IDLE")
# ── RX path (MACRAW) ─────────────────────────────────────────
# Triggered by W5500 INT (w5500_int_n low): read RX_RSR, read
# RX_RD, read the 2-byte MACRAW length, stream the frame out,
# advance RX_RD, issue RECV.
with m.State("RX_CHECK"): # read S0_RX_RSR
m.d.sync += xfer_addr.eq(_W5500_S0_RX_RSR)
m.d.sync += xfer_ctrl.eq(_CTRL_RD_S0REG)
m.d.sync += xfer_len.eq(2)
m.d.sync += xfer_stream.eq(0)
m.d.sync += xfer_sread.eq(0)
m.d.sync += wbuf[0].eq(0)
m.d.sync += wbuf[1].eq(0)
m.d.sync += xfer_start.eq(1)
m.next = "RX_RSR_W"
with m.State("RX_RSR_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += rx_rsr.eq(Cat(rbuf[1], rbuf[0]))
m.next = "RX_RSR_CHK"
with m.State("RX_RSR_CHK"):
with m.If(rx_rsr == 0):
m.next = "IDLE" # nothing received
with m.Else():
m.next = "RX_RDPTR"
with m.State("RX_RDPTR"): # read S0_RX_RD
m.d.sync += xfer_addr.eq(_W5500_S0_RX_RD)
m.d.sync += xfer_ctrl.eq(_CTRL_RD_S0REG)
m.d.sync += xfer_len.eq(2)
m.d.sync += xfer_start.eq(1)
m.next = "RX_RDPTR_W"
with m.State("RX_RDPTR_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += rx_rd.eq(Cat(rbuf[1], rbuf[0]))
m.next = "RX_LEN"
with m.State("RX_LEN"): # read 2-byte MACRAW length
m.d.sync += xfer_addr.eq(rx_rd)
m.d.sync += xfer_ctrl.eq(_CTRL_RD_S0RX)
m.d.sync += xfer_len.eq(2)
m.d.sync += xfer_start.eq(1)
m.next = "RX_LEN_W"
with m.State("RX_LEN_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += pkt_len.eq(Cat(rbuf[1], rbuf[0]))
m.next = "RX_FRAME"
with m.State("RX_FRAME"): # stream pkt_len2 frame bytes
m.d.sync += xfer_addr.eq(rx_rd + 2)
m.d.sync += xfer_ctrl.eq(_CTRL_RD_S0RX)
m.d.sync += xfer_sread.eq(1)
m.d.sync += xfer_rcount.eq(pkt_len - 2)
m.d.sync += xfer_start.eq(1)
m.next = "RX_FRAME_W"
with m.State("RX_FRAME_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.d.sync += xfer_sread.eq(0)
m.next = "RX_UPDRD"
with m.State("RX_UPDRD"): # S0_RX_RD += pkt_len
m.d.sync += xfer_addr.eq(_W5500_S0_RX_RD)
m.d.sync += xfer_ctrl.eq(_CTRL_WR_S0REG)
m.d.sync += xfer_len.eq(2)
m.d.sync += xfer_stream.eq(0)
m.d.sync += xfer_sread.eq(0)
m.d.sync += wbuf[0].eq((rx_rd + pkt_len)[8:16])
m.d.sync += wbuf[1].eq((rx_rd + pkt_len)[0:8])
m.d.sync += xfer_start.eq(1)
m.next = "RX_UPDRD_W"
with m.State("RX_UPDRD_W"):
m.d.sync += xfer_start.eq(0)
with m.If(xfer_done):
m.next = "RX_RECV"
# S0_CR = RECV (0x40), then clear the RECV interrupt so INT_N
# deasserts (write 1 to Sn_IR[2]); otherwise the FSM would re-enter
# RX_CHECK forever on a real W5500.
write_reg("RX_RECV", _W5500_S0_CR, _CTRL_WR_S0REG, [0x40], "RX_CLR_IR")
write_reg("RX_CLR_IR", _W5500_S0_IR, _CTRL_WR_S0REG, [0x04], "IDLE")
return m
# ── Testbench ─────────────────────────────────────────────────────────────
if __name__ == "__main__":
import sys
from amaranth.sim import Simulator, Period
# Short reset wait so the init sequence runs quickly in simulation.
dut = W5500SPIMaster(reset_cycles=10)
errors = []
# MAC for SHAR: par[i*8:(i+1)*8] = mac byte i → mac = 11 22 33 44 55 66
MAC = [0x11, 0x22, 0x33, 0x44, 0x55, 0x66]
PAR = sum(b << (8 * i) for i, b in enumerate(MAC))
# Expected W5500 init transactions: [addr_hi, addr_lo, ctrl, *payload].
# ctrl 0x04 = common-block write (VDM); 0x0C = socket-0-reg write (VDM).
EXPECTED = [
[0x00, 0x00, 0x04, 0x80], # MR = 0x80 (reset)
[0x00, 0x09, 0x04, *MAC], # SHAR = MAC
[0x00, 0x00, 0x0C, 0x04], # S0_MR = MACRAW
[0x00, 0x01, 0x0C, 0x01], # S0_CR = OPEN
[0x00, 0x2C, 0x0C, 0x05], # S0_IMR = RECV|SEND_OK
]
txns = [] # transactions captured by the W5500 slave model
# RX frame the W5500 will hand back, and the MACRAW length it reports.
RX_FRAME = [0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02]
RX_PKT_LEN = len(RX_FRAME) + 2 # MACRAW length includes the header
def build_response(bsb, addr):
"""Bytes the W5500 drives on MISO for a read of (bsb, addr)."""
if bsb == 1 and addr == _W5500_S0_RX_RSR:
return [(RX_PKT_LEN >> 8) & 0xFF, RX_PKT_LEN & 0xFF]
if bsb == 1 and addr == _W5500_S0_RX_RD:
return [0x00, 0x00] # RX read pointer = 0
if bsb == 3 and addr == 0x0000:
return [(RX_PKT_LEN >> 8) & 0xFF, RX_PKT_LEN & 0xFF] # length
if bsb == 3 and addr == 0x0002:
return list(RX_FRAME) # frame payload
return [0x00] * 64
async def w5500_model(ctx):
"""W5500 SPI slave model: captures CS-framed transactions (MOSI) and,
for reads, drives MISO with canned register/buffer data. Mode 0:
MOSI sampled on rising SCK, MISO shifted out MSB-first.
"""
prev_cs, prev_sck = 1, 0
rx_byte = rx_bits = nbytes = 0
hdr = [0, 0, 0]
is_read = False
resp, ridx = [], 0
msr = msr_bits = 0
cur_txn = []
async for vals in ctx.tick("sync").sample(
dut.spi_cs_n, dut.spi_clk, dut.spi_mosi):
cs, sck, mosi = vals[-3:]
rising = (prev_sck == 0 and sck == 1)
if prev_cs == 1 and cs == 0: # CS falling: start frame
cur_txn = []
rx_byte = rx_bits = nbytes = 0
is_read = False
resp, ridx, msr, msr_bits = [], 0, 0, 0
if cs == 0 and rising:
# MISO bit just sampled by the master → advance shift register
if is_read and nbytes >= 3:
msr = (msr << 1) & 0xFF
msr_bits -= 1
if msr_bits == 0:
msr = resp[ridx] if ridx < len(resp) else 0
ridx += 1
msr_bits = 8
# sample MOSI
rx_byte = ((rx_byte << 1) | mosi) & 0xFF
rx_bits += 1
if rx_bits == 8:
cur_txn.append(rx_byte)
if nbytes < 3:
hdr[nbytes] = rx_byte
if nbytes == 2: # header complete → decode
ctrl = hdr[2]
is_read = (ctrl & 0x04) == 0
bsb = ctrl >> 3
addr = (hdr[0] << 8) | hdr[1]
if is_read:
resp = build_response(bsb, addr)
msr, ridx, msr_bits = resp[0], 1, 8
nbytes += 1
rx_byte = rx_bits = 0
if prev_cs == 0 and cs == 1: # CS rising: end frame
txns.append(list(cur_txn))
ctx.set(dut.spi_miso, (msr >> 7) & 1)
prev_cs, prev_sck = cs, sck
rx_collected = []
async def rx_collector(ctx):
async for vals in ctx.tick("sync").sample(
dut.rx_valid, dut.rx_ready, dut.rx_data):
valid, ready, data = vals[-3:]
if valid and ready:
rx_collected.append(data)
async def testbench(ctx):
ctx.set(dut.par, PAR)
await ctx.tick("sync").repeat(4)
# T1: SPI idle — CLK low (Mode 0), CS high
if ctx.get(dut.spi_clk) != 0:
errors.append("T1 CLK idle != 0")
if ctx.get(dut.spi_cs_n) != 1:
errors.append("T1 CS idle != 1")
print(f"T1 idle: CLK={ctx.get(dut.spi_clk)} CS={ctx.get(dut.spi_cs_n)}")
# T2: run the init sequence
ctx.set(dut.init_req, 1)
await ctx.tick("sync").repeat(1)
ctx.set(dut.init_req, 0)
for _ in range(4000):
await ctx.tick("sync").repeat(1)
if ctx.get(dut.init_done):
break
if not ctx.get(dut.init_done):
errors.append("T2 init_done never asserted")
await ctx.tick("sync").repeat(4)
print(f"T2 init_done: {ctx.get(dut.init_done)}")
# T3: verify the captured init transaction sequence
print(f"T3 captured {len(txns)} init transactions:")
for t in txns:
print(" ", [f"0x{b:02X}" for b in t])
if txns != EXPECTED:
errors.append(f"T3 init sequence mismatch:\n got {txns}\n want {EXPECTED}")
# ── T4: TX a frame (MACRAW) ──────────────────────────────────────
txns.clear()
FRAME = [0xAA, 0xBB, 0xCC, 0xDD]
# With MISO=0 the read returns S0_TX_WR = 0x0000.
TX_EXPECTED = [
[0x00, 0x24, 0x08, 0x00, 0x00], # read S0_TX_WR (dummies)
[0x00, 0x00, 0x14, *FRAME], # write TX buffer @ 0x0000
[0x00, 0x24, 0x0C, 0x00, len(FRAME)], # S0_TX_WR += len
[0x00, 0x01, 0x0C, 0x20], # S0_CR = SEND
]
async def send_frame(frame):
for i, b in enumerate(frame):
ctx.set(dut.tx_data, b)
ctx.set(dut.tx_valid, 1)
ctx.set(dut.tx_sof, 1 if i == 0 else 0)
ctx.set(dut.tx_eof, 1 if i == len(frame) - 1 else 0)
for _ in range(2000):
if ctx.get(dut.tx_ready):
break
await ctx.tick("sync").repeat(1)
await ctx.tick("sync").repeat(1) # complete the consume
ctx.set(dut.tx_valid, 0)
ctx.set(dut.tx_sof, 0)
ctx.set(dut.tx_eof, 0)
await send_frame(FRAME)
# let the pointer-update + SEND transactions finish
for _ in range(2000):
await ctx.tick("sync").repeat(1)
if len(txns) >= len(TX_EXPECTED):
break
await ctx.tick("sync").repeat(4)
print(f"T4 captured {len(txns)} TX transactions:")
for t in txns:
print(" ", [f"0x{b:02X}" for b in t])
if txns != TX_EXPECTED:
errors.append(f"T4 TX sequence mismatch:\n got {txns}\n want {TX_EXPECTED}")
# ── T5: RX a frame (MACRAW) ──────────────────────────────────────
# The model returns RSR=pkt_len, RD=0, MACRAW length=pkt_len, then the
# frame. Expected transactions (read dummies are 0x00):
RX_EXPECTED = [
[0x00, 0x26, 0x08, 0x00, 0x00], # read S0_RX_RSR
[0x00, 0x28, 0x08, 0x00, 0x00], # read S0_RX_RD
[0x00, 0x00, 0x18, 0x00, 0x00], # read MACRAW length
[0x00, 0x02, 0x18, *([0x00] * len(RX_FRAME))], # read frame
[0x00, 0x28, 0x0C, 0x00, RX_PKT_LEN], # S0_RX_RD += pkt_len
[0x00, 0x01, 0x0C, 0x40], # S0_CR = RECV
[0x00, 0x02, 0x0C, 0x04], # S0_IR clear RECV
]
txns.clear()
ctx.set(dut.rx_ready, 1)
ctx.set(dut.w5500_int_n, 0) # signal a received packet
for _ in range(4000):
await ctx.tick("sync").repeat(1)
if len(txns) >= len(RX_EXPECTED):
break
ctx.set(dut.w5500_int_n, 1)
await ctx.tick("sync").repeat(8)
print(f"T5 captured {len(txns)} RX transactions:")
for t in txns:
print(" ", [f"0x{b:02X}" for b in t])
print(f"T5 rx frame: {[f'0x{b:02X}' for b in rx_collected]} "
f"(want {[f'0x{b:02X}' for b in RX_FRAME]})")
if txns != RX_EXPECTED:
errors.append(f"T5 RX sequence mismatch:\n got {txns}\n want {RX_EXPECTED}")
if rx_collected != RX_FRAME:
errors.append(f"T5 RX frame mismatch: got {rx_collected}, want {RX_FRAME}")
sim = Simulator(dut)
sim.add_clock(Period(MHz=24), domain="sync")
sim.add_testbench(testbench)
sim.add_process(w5500_model)
sim.add_process(rx_collector)
with sim.write_vcd("W5500SPIMaster.vcd"):
sim.run()
if errors:
print("\nFAILURES:")
for e in errors:
print(" ", e)
sys.exit(1)
else:
print("\nAll tests passed.")
+5 -5
View File
@@ -1,10 +1,10 @@
amaranth @ git+https://github.com/amaranth-lang/amaranth@main
amaranth-boards @ git+https://github.com/amaranth-lang/amaranth-boards.git@7e24efe2f6e95afddd0c1b56f1a9423c48caa472
amaranth-yosys==0.50.0.0.post115
importlib_resources==6.5.2
amaranth-boards @ git+https://github.com/amaranth-lang/amaranth-boards.git@8bc91db6f68c5c36f30926bf56836739c138986f
amaranth-yosys==0.50.0.0.post124
importlib_resources==7.1.0
Jinja2==3.1.6
jschon==0.11.1
MarkupSafe==3.0.2
MarkupSafe==3.0.3
pyvcd==0.4.1
rfc3986==2.0.0
wasmtime==36.0.0
wasmtime==45.0.0