Added full design created with Claude
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
FROM python:3.12-slim-bookworm
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
git \
|
||||
yosys \
|
||||
nextpnr-ice40 \
|
||||
fpga-icestorm \
|
||||
nodejs npm \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN npm install -g @anthropic-ai/claude-code
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
@@ -0,0 +1,32 @@
|
||||
#Requires -RunAsAdministrator
|
||||
# Attaches the IceBreaker FPGA (FTDI FT2232H, VID 0403) to WSL2 via usbipd-win.
|
||||
# Run this on the Windows host before opening the devcontainer.
|
||||
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
if (-not (Get-Command usbipd -ErrorAction SilentlyContinue)) {
|
||||
Write-Error "usbipd not found. Install it from: https://github.com/dorssel/usbipd-win/releases"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Find all devices with FTDI VID 0403
|
||||
$devices = usbipd list | Where-Object { $_ -match '0403' }
|
||||
|
||||
if (-not $devices) {
|
||||
Write-Error "No FTDI device (VID 0403) found. Is the IceBreaker plugged in?"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if (($devices | Measure-Object).Count -gt 1) {
|
||||
Write-Host "Multiple FTDI devices found:"
|
||||
$devices | ForEach-Object { Write-Host " $_" }
|
||||
Write-Error "Ambiguous. Unplug other FTDI devices or run 'usbipd attach --wsl --busid <BUSID>' manually."
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Extract BUSID (first token on the line, e.g. "3-1")
|
||||
$busid = ($devices -split '\s+')[0].Trim()
|
||||
|
||||
Write-Host "Attaching IceBreaker at bus ID $busid to WSL2..."
|
||||
usbipd attach --wsl --busid $busid
|
||||
Write-Host "Done. You can now open the devcontainer and use iceprog."
|
||||
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"name": "Amaranth HDL - IceBreaker",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile",
|
||||
"context": ".."
|
||||
},
|
||||
// USB flashing (iceprog) requires the IceBreaker to be forwarded to WSL2 first.
|
||||
// On Windows: install usbipd-win (https://github.com/dorssel/usbipd-win/releases),
|
||||
// then run (as Administrator) before opening this devcontainer:
|
||||
// .devcontainer/attach-icebreaker.ps1
|
||||
"runArgs": ["--privileged"],
|
||||
"workspaceFolder": "/workspace",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached",
|
||||
"mounts": [
|
||||
"source=${localEnv:USERPROFILE}/.claude,target=/root/.claude,type=bind,consistency=cached"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"ms-python.python",
|
||||
"ms-python.pylance",
|
||||
"anthropic.claude-code"
|
||||
],
|
||||
"settings": {
|
||||
"python.defaultInterpreterPath": "/usr/local/bin/python"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
+16
@@ -0,0 +1,16 @@
|
||||
# Generated FPGA build artifacts (regenerate with: python -m exi_bba.synth)
|
||||
build/
|
||||
|
||||
# Simulation waveforms (regenerate by running the testbenches)
|
||||
*.vcd
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
.venv/
|
||||
venv/
|
||||
|
||||
# Editor / OS cruft
|
||||
.DS_Store
|
||||
*.swp
|
||||
@@ -0,0 +1,493 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Project: GC BBA FPGA Replacement
|
||||
|
||||
Replace the GameCube Broadband Adapter (DOL-015 / MX98730EC) with an iCEbreaker
|
||||
FPGA (Lattice iCE40UP5K) written in Amaranth HDL. The FPGA emulates the BBA
|
||||
register interface over the GameCube EXI bus and bridges to a WIZnet ethernet
|
||||
chip for real 100BASE-TX ethernet — default **W5100** (indirect parallel bus,
|
||||
reaches the EXI throughput ceiling) or **W5500** (SPI Pmod, simpler wiring but
|
||||
~12 Mbit/s). GC software (Swiss homebrew) sees an identical BBA. See "W5100 vs
|
||||
W5500 ethernet back-end".
|
||||
|
||||
---
|
||||
|
||||
## Development Environment
|
||||
|
||||
**Preferred:** Use the devcontainer (`.devcontainer/`) which includes Python 3.12,
|
||||
`nextpnr-ice40`, and `fpga-icestorm` pre-installed.
|
||||
|
||||
**Windows host + WSL2 devcontainer — USB flashing setup:**
|
||||
1. Install `usbipd-win` (https://github.com/dorssel/usbipd-win/releases)
|
||||
2. Run `.devcontainer/attach-icebreaker.ps1` as Administrator before opening the devcontainer
|
||||
3. The devcontainer runs `--privileged` to pass through the USB device
|
||||
|
||||
**Local venv (outside devcontainer):**
|
||||
```bash
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
Yosys is bundled in `amaranth-yosys`; `nextpnr-ice40` and `iceprog` must be
|
||||
installed separately (via apt on Linux, or via the devcontainer).
|
||||
|
||||
---
|
||||
|
||||
## Commands
|
||||
|
||||
**Build and flash the iCEbreaker (must run from workspace root):**
|
||||
```bash
|
||||
python rebbarb/rebbarb.py
|
||||
```
|
||||
Runs synthesis (yosys), place-and-route (nextpnr-ice40), and flashes via `iceprog`.
|
||||
Set `ICEPROG=/path/to/iceprog` env var to override the binary location.
|
||||
Note: `rebbarb/rebbarb.py` builds a 36 MHz LED blink demo. The BBA
|
||||
implementation (`exi_bba/`) uses a split-domain clock: `capture` @ 54 MHz (PLL)
|
||||
for the SPI bit engine, `exi`/`sync` @ 24 MHz (HFOSC) for everything else.
|
||||
Synthesize/flash the real design with `python -m exi_bba.synth [--flash]`.
|
||||
|
||||
**Run a simulation:**
|
||||
```bash
|
||||
# New-API testbench style (preferred for new code):
|
||||
python rebbarb/toggle_button.py # writes ToggleButton.vcd
|
||||
python rebbarb/pulse_button.py # writes PulseButton.vcd
|
||||
|
||||
# Old-API process style (reference only, do not replicate in new code):
|
||||
python examples/amaranth_cdc.py # CDC primitives demo
|
||||
python examples/async_fifo.py # AsyncFIFO behaviour
|
||||
python examples/icebreaker_fifo.py # iCEbreaker-specific FIFO (Verilog dump)
|
||||
```
|
||||
Open VCD output with `gtkwave`. Simulations are the primary testing mechanism —
|
||||
there is no separate test runner.
|
||||
|
||||
**Verify PLL parameters:**
|
||||
```bash
|
||||
icepll -i 12 -o 54 # confirms DIVR=0 DIVF=71 DIVQ=4 → 54 MHz (capture domain)
|
||||
```
|
||||
(`exi`/`sync` come from the internal SB_HFOSC ÷2 = 24 MHz — no PLL.)
|
||||
|
||||
---
|
||||
|
||||
## Current Implementation State
|
||||
|
||||
The `exi_bba/` module tree is **fully implemented** with simulation testbenches.
|
||||
All modules elaborate without errors and pass their unit tests. The full design
|
||||
**synthesizes, places, routes, and meets timing** on the iCE40UP5K
|
||||
(`python -m exi_bba.synth`): `capture` closes ~70 MHz (target 54) and `exi`/
|
||||
`sync` close ~36 MHz (target 24) — both PASS.
|
||||
|
||||
### `exi_bba/` module status
|
||||
|
||||
| Module | File | Tests pass |
|
||||
|---|---|---|
|
||||
| `BBATop` | `exi_bba/bba_top.py` | ✅ EXI integration + full W5100→SPRAM→GC RX loop; synth PASS |
|
||||
| `ExiCapture` | `exi_bba/exi_capture.py` | ✅ rx/tx byte-stream + over-push/flush |
|
||||
| `SPIMode3Slave` | `exi_bba/spi_mode3_slave.py` | ✅ 4 tests (live-drive TX) |
|
||||
| `BBARegisterFile` | `exi_bba/bba_register_file.py` | ✅ 7 tests (proactive push + DMA stream) |
|
||||
| `SPRAMArbiter` | `exi_bba/spram_arbiter.py` | ✅ 3 tests |
|
||||
| `RXFrameAssembler` | `exi_bba/rx_frame_assembler.py` | ✅ 3 tests |
|
||||
| `TXFrameDrain` | `exi_bba/tx_frame_drain.py` | ✅ 2 tests |
|
||||
| `W5100ParallelMaster` | `exi_bba/w5100_parallel_master.py` | ✅ 5 tests (init/TX/RX vs bus model, incl. ring wrap) — **default eth back-end** |
|
||||
| `W5500SPIMaster` | `exi_bba/w5500_spi_master.py` | ✅ init/TX/RX vs SPI-slave model (alt back-end) |
|
||||
| `StatusPanel` | `exi_bba/status_panel.py` | ✅ 6 tests (heartbeat, stretched activity LEDs, debounced buttons, freeze) |
|
||||
| `EEPROMModel` | `exi_bba/eeprom_model.py` | ✅ 4 tests |
|
||||
|
||||
**Bring-up status panel (optional):** `BBATop(status_panel=True)` adds a
|
||||
`StatusPanel` driving onboard iCEbreaker LEDs + button (dedicated pins, so it
|
||||
coexists with EXI + W5100). `synth.py` enables it: **LEDG=heartbeat**,
|
||||
**LEDR=EXI activity** (the GC is talking), **BTN_N=manual re-init**. The full
|
||||
EXI + W5100 + panel build synthesizes and meets timing (slow ~35≥24, capture
|
||||
~64≥54, 44% LC). Panel LEDs 3–5 (rx/tx/ready) exist in the module but aren't
|
||||
mapped on the iCEbreaker (only 2 discrete LEDs); the onboard RGB or a custom
|
||||
PCB can expose them.
|
||||
|
||||
**Ethernet back-end is selectable:** `BBATop(eth="w5100")` (default — indirect
|
||||
parallel bus, reaches the ~27 Mbit/s EXI ceiling) or `BBATop(eth="w5500")` (SPI,
|
||||
~12 Mbit/s). Both masters expose the identical tx/rx/init/par streaming
|
||||
interface; only the physical pins differ. See "W5100 vs W5500" below.
|
||||
|
||||
### Run all module testbenches (from workspace root)
|
||||
```bash
|
||||
python -m exi_bba.spi_mode3_slave
|
||||
python -m exi_bba.exi_capture
|
||||
python -m exi_bba.bba_register_file
|
||||
python -m exi_bba.spram_arbiter
|
||||
python -m exi_bba.rx_frame_assembler
|
||||
python -m exi_bba.tx_frame_drain
|
||||
python -m exi_bba.w5100_parallel_master # 5 tests: init, TX(+wrap), RX(+wrap)
|
||||
python -m exi_bba.w5500_spi_master
|
||||
python -m exi_bba.status_panel # 6 tests: heartbeat/activity/buttons
|
||||
python -m exi_bba.eeprom_model
|
||||
python -m exi_bba.bba_top # end-to-end EXI integration test (W5100 RX loop)
|
||||
```
|
||||
|
||||
### Pending work
|
||||
- **Synthesis/timing**: ✅ done — `python -m exi_bba.synth` synthesizes, P&Rs,
|
||||
and meets timing on both clock domains (capture ~68≥54, slow ~40≥24).
|
||||
- **W5500 init/TX/RX**: ✅ done — `W5500SPIMaster` has a real Mode-0 byte engine,
|
||||
a generic register-transaction engine (header + wbuf/stream payload), the full
|
||||
init sequence (MR reset, SHAR, S0_MR MACRAW, S0_CR OPEN, S0_IMR), MACRAW TX
|
||||
(read TX_WR → stream frame to TX buffer → advance TX_WR → SEND) and MACRAW RX
|
||||
(RSR → RD → 2-byte length → stream frame out → advance RD → RECV). All verified
|
||||
on the wire by a responding W5500 SPI-slave model in the testbench.
|
||||
- **PAR0–5 → W5500 SHAR**: ✅ done — `reg.par` wired to `w5500.par` in `BBATop`
|
||||
(PAR0 packed in the low byte so it is the first SHAR octet).
|
||||
- **NCRA SR bit**: ✅ done — `BBARegisterFile.ncra_sr` (= NCRA[3]) gates
|
||||
`asm.rx_enabled` in `BBATop` (was hard-wired to 1).
|
||||
- **W5500 SPI throughput**: SCK = sync÷2 = 12 MHz (~12 Mbit/s) — exceeds
|
||||
real-world GC BBA TCP throughput (~6–10 Mbit/s) but is below the 27 Mbit/s raw
|
||||
EXI ceiling. Pushing past 12 Mbit/s was investigated and found NOT achievable
|
||||
on this UP5K (the W5500-operating logic is distributed ~40 MHz, not just the
|
||||
bit-bang) — see the "Full-rate W5500 SPI" item below.
|
||||
`W5500SPIMaster(clk_div=N)` divides SCK further if signal integrity needs it.
|
||||
- **EXI DMA bulk reads**: ✅ done — SPRAM-region reads (addr ≥ 0x100) now STREAM
|
||||
until CS deasserts instead of stopping at the header's 2-bit length, so they
|
||||
serve both ≤4-byte immediate reads (Swiss) AND arbitrary-length DMA reads
|
||||
(other GC software, and a future Swiss path for loading ROMs from a network
|
||||
file store). Implementation:
|
||||
- `SPIMode3Slave.cs_active` (synchronised CS level) → `ExiCapture` crosses it
|
||||
to the exi domain (FFSynchronizer) → `BBARegisterFile.cs_active`.
|
||||
- `BBARegisterFile` SPRAM_STREAM state: auto-increments the SPRAM address,
|
||||
prefetches up to SP_LIMIT=4 reads in flight, pushes responses to tx_fifo;
|
||||
SPRAM_END drains the in-flight pipeline + rx dummies on CS-rise.
|
||||
- `ExiCapture` flushes tx_fifo on CS-fall to clear prefetch over-push so a
|
||||
truncated DMA read can't leak stale bytes into the next transaction.
|
||||
Tested: register-file streaming read (SPRAM model, 12 bytes), ExiCapture
|
||||
over-push/flush, AND the full BBATop loop — a W5500 model delivers a frame →
|
||||
W5500 master RX → RXFrameAssembler writes the SPRAM ring → GC reads RWP then
|
||||
DMA-reads the descriptor+frame back (verified byte-for-byte).
|
||||
Note: a DMA read header must keep length-1 within the 2-bit field; the GC
|
||||
driver sets it ≤3 and clocks the real length via CS (the design streams
|
||||
until CS regardless). (EXI DMA *writes* are not implemented; the GC's
|
||||
DMA-write engine has a 1-bit-shift bug and Swiss avoids them — see
|
||||
design-doc §"EXI DMA bug".)
|
||||
- **S0_IR interrupt clear after RX**: ✅ done — `W5500SPIMaster` RX_CLR_IR state
|
||||
writes Sn_IR[2]=1 after RECV so `INT_N` deasserts (else the FSM would re-enter
|
||||
RX_CHECK forever on real hardware).
|
||||
- **Full-rate W5500 SPI (27 Mbit/s) — INVESTIGATED, NOT achievable on UP5K**:
|
||||
the W5500 SCK is sync÷2 = 12 MHz. Raising it needs the SPI engine on a ≥54 MHz
|
||||
clock, but a standalone synth of `W5500SPIMaster` in the capture domain closes
|
||||
only **40 MHz** — and the slack histogram shows the failure is *distributed*
|
||||
(~140 endpoints fail 54, incl. the `wbuf`/header mux feeding the shift
|
||||
register), NOT a single cuttable path. So the bottleneck is the **logic that
|
||||
operates the SPI device** (transaction FSM, byte sourcing), not the bit-bang.
|
||||
Consequences:
|
||||
- The "split the bit engine to capture + per-byte CDC handshake" idea nets
|
||||
only ~14 Mbit/s — the CDC round-trip ≈ the SPI byte time — not worth it.
|
||||
- A capture-domain "streaming executor" would still contain that distributed
|
||||
~40 MHz logic, so it wouldn't close 54 either.
|
||||
- Hardware `SB_SPI` wouldn't help (it only offloads the bit-bang, which was
|
||||
never the bottleneck) and is unsimulatable.
|
||||
- There is no usable clock between 24 (HFOSC) and 54 (the one PLL, needed at
|
||||
54 for the EXI front-end); PLL÷2 = 27 → SCK 13.5 MHz, a ~12% gain, not
|
||||
worth the fabric divider.
|
||||
Net: 12 Mbit/s is the practical W5500 ceiling on this part. It exceeds
|
||||
real-world GC BBA TCP throughput and is fine for chunked ROM streaming.
|
||||
Reaching 27 Mbit/s would need a faster FPGA or a much shallower W5500-operating
|
||||
redesign (uncertain) — **OR a parallel-bus ethernet chip (see W5100 below)**,
|
||||
which is the implemented solution for the ROM-streaming throughput target.
|
||||
|
||||
## W5100 vs W5500 ethernet back-end
|
||||
|
||||
The throughput insight: SPI serialises 8 bits/byte, so the W5500 byte rate is
|
||||
(operating-logic clock)/16 — and that logic caps ~40 MHz on this UP5K → ~12
|
||||
Mbit/s. A **parallel** bus moves a whole byte per access, so the *same* ~24 MHz
|
||||
`sync` logic clears the 27 Mbit/s EXI ceiling (the real hard limit — the GC EXI
|
||||
bus tops out there). So `W5100ParallelMaster` is the throughput path and is now
|
||||
the `BBATop` default.
|
||||
|
||||
- **Interface:** W5100 **indirect parallel bus** (IDM). Only A[1:0] are wired
|
||||
(board ties A[14:2]=0 so a power-up direct access at A=00 still hits MR):
|
||||
`00`=MR, `01`=IDM_AR0(hi), `10`=IDM_AR1(lo), `11`=IDM_DR. A register/buffer
|
||||
access = write IDM_AR (the 16-bit address) then read/write IDM_DR. With MR.AI
|
||||
set, IDM_DR auto-increments → a multi-byte block is one address-set + a burst.
|
||||
- **Bus engine:** drives A + D with `/CS` and `/RD`|`/WR` asserted for
|
||||
`strobe_cycles` (default 3 ≈ 125 ns at 24 MHz, ≥ the W5100's ~80 ns access).
|
||||
DATA[7:0] is bidirectional → an SB_IO tristate (`bus_data_o`/`oe`/`i`).
|
||||
- **Pins (15):** A[1:0]=2, D[7:0]=8, /CS,/RD,/WR=3, /INT=1, /RST=1. With EXI (5)
|
||||
+ clk (1) = **21 of ~34 usable SG48 I/O** — comfortable. See `synth.py`.
|
||||
- **MR.AI requires init first:** unlike the W5500 (each SPI transaction is
|
||||
self-framed), the W5100's multi-byte accesses depend on MR.AI, so the init
|
||||
sequence (triggered by the GC's NCRA reset) MUST run before any TX/RX. The
|
||||
BBATop test issues NCRA-reset before its RX loop for this reason; on hardware
|
||||
the GC driver already does. (`BBATop(reset_cycles=N)` shrinks the MR settle
|
||||
wait for sim.)
|
||||
- **Ring wraparound is in fabric:** the W5100 does NOT auto-wrap the IDM address
|
||||
at the socket-buffer boundary (the W5500 did), so the streamer re-sets IDM_AR
|
||||
to the buffer base when the running address reaches the 2 KB boundary. Handled
|
||||
in the SW/SR/RB paths (`xfer_wrap`/`xfer_wbase`/`xfer_wend`/`cur_addr`); both
|
||||
TX and RX wrap cases are tested.
|
||||
- **Register map differs from the W5500:** common regs at 0x0000 (MR, SHAR 0x09,
|
||||
IMR 0x16, RMSR/TMSR 0x1A/0x1B), socket 0 at 0x0400 (S0_MR/CR/IR, TX_WR 0x424,
|
||||
RX_RSR 0x426, RX_RD 0x428), TX buffer 0x4000, RX buffer 0x6000. MACRAW mode.
|
||||
- **Status:** init/TX/RX (with wrap) verified vs a bus model; BBATop full
|
||||
W5100→SPRAM→GC RX loop passes byte-for-byte; synth PASS (slow ~32≥24, capture
|
||||
~56≥54, 42% LC). Register addresses/MR bits are from the datasheet (from
|
||||
memory) — **confirm at hardware bring-up**.
|
||||
|
||||
### `rebbarb/` — LED blink demo (unchanged)
|
||||
- `rebbarb.py` — blinks LEDs via a PLL (36 MHz), demonstrates `IceBreakerPlatform`
|
||||
- `debouncer.py` — `Debouncer(cycles)` — synchronous debounce, configurable hold
|
||||
- `toggle_button.py` — `ToggleButton` — edge-to-toggle state machine (wraps Debouncer)
|
||||
- `pulse_button.py` — `PulseButton` — single-cycle pulse on rising edge (wraps Debouncer)
|
||||
|
||||
These components are reusable building blocks. The `Debouncer` and button wrappers
|
||||
will be needed for any physical input in `exi_bba/`.
|
||||
|
||||
**Import note:** `rebbarb/` files use bare imports (`from debouncer import Debouncer`).
|
||||
Run them as `python rebbarb/<file>.py` from the workspace root so Python adds
|
||||
`rebbarb/` to `sys.path` automatically.
|
||||
|
||||
**Simulation at module level:** `toggle_button.py` and `pulse_button.py` run
|
||||
their simulations unconditionally (no `__main__` guard) — importing either file
|
||||
triggers a VCD write. New modules should guard simulation code with
|
||||
`if __name__ == "__main__":`.
|
||||
|
||||
`examples/amaranth_cdc.py` contains handwritten `SyncFF` and `TogglePulseSync`
|
||||
reference implementations — use `amaranth.lib.cdc` primitives (`FFSynchronizer`,
|
||||
`PulseSynchronizer`) in production code instead.
|
||||
|
||||
`hardware/sp1_test_plug/` — KiCad project for a physical SP1 edge-connector test
|
||||
plug (schematic, PCB, custom GameCube symbol library). Used to verify pad geometry
|
||||
before ordering the interposer PCB; not part of the FPGA build.
|
||||
|
||||
---
|
||||
|
||||
## Amaranth Simulator API
|
||||
|
||||
Two API generations are present in this repo:
|
||||
|
||||
| API | Where used | Status |
|
||||
|---|---|---|
|
||||
| `sim.add_testbench(async_fn)` + `await ctx.tick()` + `Period(MHz=n)` | `rebbarb/*.py` | **Use this for new code** |
|
||||
| `sim.add_sync_process(gen_fn)` + `sim.run_until(t)` | `examples/` | Old — reference only |
|
||||
|
||||
New modules should use the testbench API (`add_testbench`, `sim.write_vcd(ctx)`
|
||||
context manager). The old process API still works but is not idiomatic in current
|
||||
Amaranth.
|
||||
|
||||
**Critical testbench timing rule:** `ctx.get(signal)` reads signal values AFTER
|
||||
the clock edge (post-update registered values). Combinatorial signals that depend
|
||||
on registered signals that were updated by the SAME tick will already reflect the
|
||||
new registered values. For example: if `tx_sof = tx_bytes_r_rdy & is_first` and
|
||||
`is_first` is cleared synchronously on the first byte, then reading `tx_sof` after
|
||||
the first byte's tick always returns 0 — read BEFORE the tick instead.
|
||||
|
||||
**`ctx.set()` takes effect immediately** (combinatorial, not registered). Use it
|
||||
AFTER `await ctx.tick()` to prepare inputs for the NEXT tick.
|
||||
|
||||
The full design specification lives in `docs/gc_bba_fpga_design.md`.
|
||||
|
||||
---
|
||||
|
||||
## Key Architecture Decisions
|
||||
|
||||
- **No network stack in the FPGA.** The GC CPU runs TCP/IP. The FPGA is a dumb
|
||||
MAC bridge.
|
||||
- **Split-domain clocking — 3 domains, 2 sources (1 PLL + 1 HFOSC):**
|
||||
- `capture` — 54 MHz (PLL, DIVR=0 DIVF=71 DIVQ=4). Hosts ONLY the SPI Mode 3
|
||||
bit engine inside `ExiCapture`. 54 MHz = 2× the **real 27 MHz** EXI clock —
|
||||
the minimum oversampling for clean Mode 3. The isolated bit engine closes
|
||||
~91 MHz; integrated with the byte-FIFO read path the capture domain closes
|
||||
~62 MHz, so 54 passes with margin.
|
||||
- `exi` — 24 MHz (HFOSC ÷2). BBA register file / transaction FSM.
|
||||
- `sync` — 24 MHz (same HFOSC net as `exi`). SPRAM arbiter, RX/TX engines,
|
||||
W5500 SPI master.
|
||||
- **Why split:** only the tiny SPI bit engine needs a fast clock to sample
|
||||
27 MHz EXI. The bulky register-file/SPRAM/W5500 logic is routing-bound at
|
||||
~33–44 MHz on the UP5K and only needs the byte rate (27 MHz ÷ 8 ≈ 3.4 MHz).
|
||||
`ExiCapture` bridges capture↔exi with rx/tx byte AsyncFIFOs.
|
||||
- **EXI clock reality:** the GC EXI clock tops out at ~27 MHz. libogc's
|
||||
`EXI_SPEED32MHZ` is a nominal name — the real rate is 27 MHz. The old
|
||||
"96 MHz = 3× 32 MHz EXI" target was doubly wrong and unreachable on UP5K
|
||||
(which caps ~44 MHz for non-trivial logic).
|
||||
- **TX/MISO across the split:** the register file PROACTIVELY pushes read
|
||||
responses into the tx byte FIFO during the EXI clock-idle gap (the GC pauses
|
||||
the clock between an EXI_Imm header-write and the data-read). The bit engine
|
||||
drives MISO live from the FIFO head; see `ExiCapture` / `SPIMode3Slave`.
|
||||
- **All CDC via `amaranth.lib.cdc`.** Never pass raw multi-bit signals across
|
||||
domains. Use `FFSynchronizer` for slow single bits, `PulseSynchronizer` for
|
||||
events, `AsyncFIFO` for data streams, `ResetSynchronizer` for resets.
|
||||
- **Register file lives entirely in `exi` domain.** The `sync` domain only
|
||||
communicates through AsyncFIFOs and PulseSynchronizers — never direct register
|
||||
reads/writes.
|
||||
|
||||
---
|
||||
|
||||
## Critical Protocol Notes
|
||||
|
||||
### EXI / SPI Mode 3
|
||||
- CLK idles **HIGH** (CPOL=1, CPHA=1).
|
||||
- MOSI sampled on **falling** CLK edge. MISO driven on **rising** CLK edge.
|
||||
- Getting this wrong means the GC never enumerates the device.
|
||||
- CS is active **low**, delineates each transaction.
|
||||
|
||||
### EXI Transaction Header (2 bytes before data)
|
||||
```
|
||||
Byte 0: [7]=write_flag [6:0]=addr[12:6]
|
||||
Byte 1: [7:2]=addr[5:0] [1:0]=xfer_len-1 (0=1B … 3=4B)
|
||||
```
|
||||
Full address = 13 bits → 0x0000–0x1FFF.
|
||||
|
||||
### Device ID Query
|
||||
On power-on the GC writes `0x0000` (2 bytes) then reads 4 bytes.
|
||||
Must return: `0x04 0x02 0x02 0x00`.
|
||||
|
||||
---
|
||||
|
||||
## Memory Map (abridged)
|
||||
|
||||
| Range | Region |
|
||||
|---|---|
|
||||
| 0x0000–0x0033 | MAC control registers (register file, exi domain) |
|
||||
| 0x0048 | TXDATA — bulk TX data port (→ `tx_bytes` AsyncFIFO) |
|
||||
| 0x0100–0x0FFF | RX ring buffer in SPRAM (15 × 256-byte pages, pages 1–15) |
|
||||
| 0x0100–0x1FFF | any read ≥ 0x0100 streams from SPRAM (DMA path); the ring proper is pages 1–15 above |
|
||||
|
||||
---
|
||||
|
||||
## Key Registers
|
||||
|
||||
| Addr | Name | Notes |
|
||||
|---|---|---|
|
||||
| 0x00 | NCRA | [0]=RESET self-clears; pulses `ncra_rst` to sync domain |
|
||||
| 0x08 | IMR | Interrupt mask |
|
||||
| 0x09 | IR | Write-1-to-clear. [1]=RI, [2]=TI. INT_N asserts when IR & IMR ≠ 0 |
|
||||
| 0x16–17 | RWP | RX write pointer — updated by sync domain via `rx_wptr` FIFO |
|
||||
| 0x18–19 | RRP | RX read pointer — GC writes after consuming frames |
|
||||
| 0x20–25 | PAR0–5 | MAC address; also forwarded to W5500 as SHAR |
|
||||
| 0x31 | NWAYS | Hardcode **0x17** (100M full-duplex link up, autoneg complete) |
|
||||
| 0x3A | HIPR | Hardcode **0x01** (BBA present) |
|
||||
| 0x48 | TXDATA | GC streams TX frame bytes here |
|
||||
|
||||
---
|
||||
|
||||
## Module Breakdown
|
||||
|
||||
| Module | Domain | File |
|
||||
|---|---|---|
|
||||
| `BBATop` | all | `exi_bba/bba_top.py` |
|
||||
| `ExiCapture` | capture (+exi FIFOs) | `exi_bba/exi_capture.py` |
|
||||
| `SPIMode3Slave` | capture (param `domain`) | `exi_bba/spi_mode3_slave.py` |
|
||||
| `BBARegisterFile` | exi (+FIFO to sync) | `exi_bba/bba_register_file.py` |
|
||||
| `SPRAMArbiter` | sync | `exi_bba/spram_arbiter.py` |
|
||||
| `RXFrameAssembler` | sync | `exi_bba/rx_frame_assembler.py` |
|
||||
| `TXFrameDrain` | sync | `exi_bba/tx_frame_drain.py` |
|
||||
| `W5100ParallelMaster` | sync | `exi_bba/w5100_parallel_master.py` (default eth) |
|
||||
| `W5500SPIMaster` | sync | `exi_bba/w5500_spi_master.py` (alt eth) |
|
||||
| `EEPROMModel` | exi | `exi_bba/eeprom_model.py` |
|
||||
|
||||
`ExiCapture` wraps `SPIMode3Slave` (in the fast `capture` domain) plus the
|
||||
capture↔exi rx/tx byte AsyncFIFOs. `BBARegisterFile` consumes the rx byte
|
||||
stream and proactively pushes read responses into the tx byte FIFO — it no
|
||||
longer sees the per-bit SPI cadence (that lives entirely in `capture`).
|
||||
|
||||
---
|
||||
|
||||
## CDC Signal Inventory
|
||||
|
||||
| Signal | Direction | Primitive |
|
||||
|---|---|---|
|
||||
| EXI CLK / MOSI / CS pins | async → capture | `FFSynchronizer` (stages=2) |
|
||||
| RX byte stream (capture→core) | capture → exi | `AsyncFIFO` 8-bit, depth=4 |
|
||||
| TX byte stream (core→capture) | exi → capture | `AsyncFIFO` 8-bit, depth=2 |
|
||||
| cs_active (transaction in progress) | capture → exi | `FFSynchronizer` (DMA read length) |
|
||||
| SPRAM read request (addr) | exi → sync | `AsyncFIFO` 16-bit, depth=4 |
|
||||
| SPRAM read result (data) | sync → exi | `AsyncFIFO` 8-bit, depth=4 |
|
||||
| TX packet bytes | exi → sync | `AsyncFIFO` 8-bit, depth=16 |
|
||||
| TX frame length | exi → sync | `AsyncFIFO` 16-bit, depth=4 |
|
||||
| RX frame bytes | sync → SPRAM | `RXFrameAssembler` → `SPRAMArbiter` (not a byte FIFO; the GC reads frames back out of SPRAM via the SPRAM read req/rsp FIFOs) |
|
||||
| RWP update | sync → exi | `AsyncFIFO` 8-bit, depth=4 |
|
||||
| RRP update | exi → sync | `AsyncFIFO` 8-bit, depth=4 |
|
||||
| RX ready (IR[RI]) | sync → exi | `PulseSynchronizer` |
|
||||
| TX done (IR[TI]) | sync → exi | `PulseSynchronizer` |
|
||||
| NCRA reset pulse | exi → sync | `PulseSynchronizer` |
|
||||
|
||||
---
|
||||
|
||||
## W5500 Configuration (on NCRA reset)
|
||||
|
||||
The W5500 selects the register **block** via the BSB field of the control byte,
|
||||
NOT via the address — so register addresses below are **block offsets**, not flat
|
||||
0x4000-style addresses (see `_W5500_*` and `_CTRL_*` in `w5500_spi_master.py`).
|
||||
```
|
||||
1. Write MR = 0x80 (common block, offset 0x0000) software reset
|
||||
2. Wait ~1 ms
|
||||
3. Write SHAR = MAC (common block, offset 0x0009, 6 bytes from PAR0–5)
|
||||
4. Write S0_MR = 0x04 (socket-0 reg block, offset 0x0000) MACRAW
|
||||
5. Write S0_CR = 0x01 (socket-0 reg block, offset 0x0001) OPEN
|
||||
6. Write S0_IMR = 0x05 (socket-0 reg block, offset 0x002C) RECV | SEND_OK
|
||||
```
|
||||
|
||||
W5500 SPI is **Mode 0** (CPOL=0 CPHA=0); SCK = **12 MHz** (the 24 MHz `sync`
|
||||
domain ÷ 2 via a toggle clock-enable). Connect W5500 `INT_N` to an FPGA input
|
||||
for low-latency RX detection. (The W5500 is the alternate back-end; the W5100
|
||||
parallel master is the default — see "W5100 vs W5500".)
|
||||
|
||||
---
|
||||
|
||||
## Physical Interface (SP1 Edge Connector)
|
||||
|
||||
- PCB must be **1.2 mm thick, ENIG finish**.
|
||||
- Staggered (not mirrored) top/bottom contact rows — same geometry as PCI/ISA.
|
||||
- Derive exact pad geometry from **SP1ETH KiCad project** (silverstee1/SP1ETH),
|
||||
cross-referenced with ETH2SP1 (LaserBear). Do not rely on YAGCD alone.
|
||||
- Add **100 µF bulk cap** on the interposer near FPGA power pins (3.3 V budget
|
||||
is tight: iCEbreaker ~80 mA + W5500 ~150 mA ≈ 230 mA).
|
||||
- **Pin 5 is 12 V — do not connect to FPGA I/O.** Test point or leave open.
|
||||
- `EXTIN` (pin 1): tie to 3.3 V via 10 kΩ — required for GC device enumeration.
|
||||
- All signal levels are 3.3 V. No level shifting needed.
|
||||
|
||||
---
|
||||
|
||||
## SPRAM Notes
|
||||
|
||||
- iCE40UP5K has 128 KB SPRAM (SB_SPRAM256KA, 16-bit wide).
|
||||
- **1-cycle synchronous read latency** — result of read at cycle N is valid at N+1.
|
||||
- Byte writes via `MASKWREN`: lower byte = `0b0011`, upper byte = `0b1100`.
|
||||
- Address to SPRAM = byte_address >> 1.
|
||||
- ETH writes take priority over EXI reads in the arbiter (safe by ring-buffer
|
||||
invariant: GC only reads pages the ETH engine has already finished).
|
||||
|
||||
---
|
||||
|
||||
## GC Initialisation Sequence (Swiss/BBA driver)
|
||||
|
||||
```
|
||||
1. Write 0x0000 × 2, read 4 B → must get 0x04020200 (device ID)
|
||||
2. Write NCRA = 0x01 (reset, self-clears; resets W5500 + SPRAM ptrs)
|
||||
3. Poll NCRA bit 0 until 0 (wait reset complete)
|
||||
4. Write PAR0–5 (MAC address)
|
||||
5. Write MAR0–7 = 0xFF (promiscuous multicast)
|
||||
6. Write ANALOG = 0xD6 (enable PHY — no FPGA effect, just store)
|
||||
7. Write NWAYC (autoneg config — store only)
|
||||
8. Write IMR = 0x86 (enable RBFI | TI | RI interrupts)
|
||||
9. Write GCA (AUTOPUB bit)
|
||||
10. Write NCRA SR bit = 0x08 (start receive)
|
||||
11. Poll NWAYS until link up → return hardcoded 0x17 immediately
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Notes & Gotchas
|
||||
|
||||
- **`NWAYS` must return `0x17` always.** GC polls it to confirm 100 Mbps link
|
||||
before enabling RX. Do not attempt to reflect real W5500 link status.
|
||||
- **`EEPROMModel` can be stubbed initially.** Many GC BBA drivers write their own
|
||||
MAC to PAR0–5 rather than using the EEPROM. Pre-populate PAR0–5 reset state
|
||||
with a valid Nintendo OUI MAC (`00:09:BF:xx:xx:xx`).
|
||||
- **`tx_load` timing in `SPIMode3Slave`:** pulses at CS assertion (first byte)
|
||||
and after each complete received byte. Upstream must register next TX byte
|
||||
within one `exi` clock.
|
||||
- **PLL target 54 MHz**: verify with `icepll -i 12 -o 54` (DIVR=0 DIVF=71 DIVQ=4)
|
||||
before coding PLL parameters; the capture-domain bit engine oversamples the
|
||||
27 MHz EXI clock 2×.
|
||||
- **TX buffer selection (NCRA ST bits):** Ignore buffer select (ST1 vs ST0).
|
||||
Treat any non-zero ST as a TX trigger.
|
||||
- **If nextpnr fails capture-domain timing at 54 MHz:** the isolated bit engine
|
||||
closes ~91 MHz, so 54 has margin; if a seed fails, sweep seeds
|
||||
(`synth.py --seeds N`) or instruct users to configure Swiss to a lower EXI
|
||||
clock index.
|
||||
-195
@@ -1,195 +0,0 @@
|
||||
$comment Generated by Amaranth $end
|
||||
$date 2025-09-20 22:27:02.816595 $end
|
||||
$timescale 1 fs $end
|
||||
$scope module bench $end
|
||||
$scope module top $end
|
||||
$var wire 1 ! clk $end
|
||||
$var wire 1 " rst $end
|
||||
$var wire 1 # i $end
|
||||
$var wire 1 $ i$3 $end
|
||||
$var wire 14 % counter $end
|
||||
$var wire 1 & o $end
|
||||
$var wire 1 ' o$6 $end
|
||||
$var wire 1 ( last_seen $end
|
||||
$scope module U$0 $end
|
||||
$var wire 1 ! clk $end
|
||||
$var wire 1 " rst $end
|
||||
$var wire 1 # i $end
|
||||
$var wire 1 ' o $end
|
||||
$var wire 1 ) prevInValid $end
|
||||
$var wire 14 * count $end
|
||||
$var wire 1 + state $end
|
||||
$var wire 1 , prevIn $end
|
||||
$upscope $end
|
||||
$upscope $end
|
||||
$upscope $end
|
||||
$enddefinitions $end
|
||||
#0
|
||||
$dumpvars
|
||||
0!
|
||||
0"
|
||||
0#
|
||||
0$
|
||||
b0 %
|
||||
0&
|
||||
0'
|
||||
0(
|
||||
0)
|
||||
b10011100010000 *
|
||||
0+
|
||||
0,
|
||||
$end
|
||||
#500000000
|
||||
1!
|
||||
1)
|
||||
b0 *
|
||||
#1000000000
|
||||
0!
|
||||
#1500000000
|
||||
1!
|
||||
1$
|
||||
1#
|
||||
#2000000000
|
||||
0!
|
||||
#2500000000
|
||||
1!
|
||||
1+
|
||||
1,
|
||||
b10011100010000 *
|
||||
1'
|
||||
#3000000000
|
||||
0!
|
||||
#3500000000
|
||||
1!
|
||||
1&
|
||||
1(
|
||||
#4000000000
|
||||
0!
|
||||
#4500000000
|
||||
1!
|
||||
0&
|
||||
b10011100010000 %
|
||||
#5000000000
|
||||
0!
|
||||
#5500000000
|
||||
1!
|
||||
b10011100001111 %
|
||||
#6000000000
|
||||
0!
|
||||
#6500000000
|
||||
1!
|
||||
b10011100001110 %
|
||||
0$
|
||||
0#
|
||||
#7000000000
|
||||
0!
|
||||
#7500000000
|
||||
1!
|
||||
0,
|
||||
b10011100001111 *
|
||||
b10011100001101 %
|
||||
#8000000000
|
||||
0!
|
||||
#8500000000
|
||||
1!
|
||||
b10011100001110 *
|
||||
b10011100001100 %
|
||||
#9000000000
|
||||
0!
|
||||
#9500000000
|
||||
1!
|
||||
b10011100001101 *
|
||||
b10011100001011 %
|
||||
#10000000000
|
||||
0!
|
||||
#10500000000
|
||||
1!
|
||||
b10011100001100 *
|
||||
b10011100001010 %
|
||||
#11000000000
|
||||
0!
|
||||
#11500000000
|
||||
1!
|
||||
b10011100001011 *
|
||||
b10011100001001 %
|
||||
1$
|
||||
1#
|
||||
#12000000000
|
||||
0!
|
||||
#12500000000
|
||||
1!
|
||||
1,
|
||||
b10011100010000 *
|
||||
b10011100001000 %
|
||||
#13000000000
|
||||
0!
|
||||
#13500000000
|
||||
1!
|
||||
b10011100000111 %
|
||||
#14000000000
|
||||
0!
|
||||
#14500000000
|
||||
1!
|
||||
b10011100000110 %
|
||||
#15000000000
|
||||
0!
|
||||
#15500000000
|
||||
1!
|
||||
b10011100000101 %
|
||||
#16000000000
|
||||
0!
|
||||
#16500000000
|
||||
1!
|
||||
b10011100000100 %
|
||||
0$
|
||||
0#
|
||||
#17000000000
|
||||
0!
|
||||
#17500000000
|
||||
1!
|
||||
0,
|
||||
b10011100001111 *
|
||||
b10011100000011 %
|
||||
#18000000000
|
||||
0!
|
||||
#18500000000
|
||||
1!
|
||||
b10011100001110 *
|
||||
b10011100000010 %
|
||||
#19000000000
|
||||
0!
|
||||
#19500000000
|
||||
1!
|
||||
b10011100001101 *
|
||||
b10011100000001 %
|
||||
#20000000000
|
||||
0!
|
||||
#20500000000
|
||||
1!
|
||||
b10011100001100 *
|
||||
b10011100000000 %
|
||||
#21000000000
|
||||
0!
|
||||
#21500000000
|
||||
1!
|
||||
b10011100001011 *
|
||||
b10011011111111 %
|
||||
#22000000000
|
||||
0!
|
||||
#22500000000
|
||||
1!
|
||||
b10011100001010 *
|
||||
b10011011111110 %
|
||||
#23000000000
|
||||
0!
|
||||
#23500000000
|
||||
1!
|
||||
b10011100001001 *
|
||||
b10011011111101 %
|
||||
#24000000000
|
||||
0!
|
||||
#24500000000
|
||||
1!
|
||||
b10011100001000 *
|
||||
b10011011111100 %
|
||||
#25000000000
|
||||
@@ -1,171 +0,0 @@
|
||||
$comment Generated by Amaranth $end
|
||||
$date 2025-09-20 22:27:02.809849 $end
|
||||
$timescale 1 fs $end
|
||||
$scope module bench $end
|
||||
$scope module top $end
|
||||
$var wire 1 ! clk $end
|
||||
$var wire 1 " rst $end
|
||||
$var wire 1 # i $end
|
||||
$var wire 1 $ i$3 $end
|
||||
$var wire 1 % o $end
|
||||
$var wire 1 & last_seen $end
|
||||
$var wire 1 ' o$6 $end
|
||||
$scope module U$0 $end
|
||||
$var wire 1 ! clk $end
|
||||
$var wire 1 " rst $end
|
||||
$var wire 1 # i $end
|
||||
$var wire 1 % o $end
|
||||
$var wire 1 ( prevInValid $end
|
||||
$var wire 14 ) count $end
|
||||
$var wire 1 * state $end
|
||||
$var wire 1 + prevIn $end
|
||||
$upscope $end
|
||||
$upscope $end
|
||||
$upscope $end
|
||||
$enddefinitions $end
|
||||
#0
|
||||
$dumpvars
|
||||
0!
|
||||
0"
|
||||
0#
|
||||
0$
|
||||
0%
|
||||
0&
|
||||
0'
|
||||
0(
|
||||
b10011100010000 )
|
||||
0*
|
||||
0+
|
||||
$end
|
||||
#500000000
|
||||
1!
|
||||
b0 )
|
||||
1(
|
||||
#1000000000
|
||||
0!
|
||||
#1500000000
|
||||
1!
|
||||
1$
|
||||
1#
|
||||
#2000000000
|
||||
0!
|
||||
#2500000000
|
||||
1!
|
||||
b10011100010000 )
|
||||
1*
|
||||
1+
|
||||
1%
|
||||
#3000000000
|
||||
0!
|
||||
#3500000000
|
||||
1!
|
||||
1&
|
||||
1'
|
||||
#4000000000
|
||||
0!
|
||||
#4500000000
|
||||
1!
|
||||
#5000000000
|
||||
0!
|
||||
#5500000000
|
||||
1!
|
||||
#6000000000
|
||||
0!
|
||||
#6500000000
|
||||
1!
|
||||
0$
|
||||
0#
|
||||
#7000000000
|
||||
0!
|
||||
#7500000000
|
||||
1!
|
||||
b10011100001111 )
|
||||
0+
|
||||
#8000000000
|
||||
0!
|
||||
#8500000000
|
||||
1!
|
||||
b10011100001110 )
|
||||
#9000000000
|
||||
0!
|
||||
#9500000000
|
||||
1!
|
||||
b10011100001101 )
|
||||
#10000000000
|
||||
0!
|
||||
#10500000000
|
||||
1!
|
||||
b10011100001100 )
|
||||
#11000000000
|
||||
0!
|
||||
#11500000000
|
||||
1!
|
||||
b10011100001011 )
|
||||
1$
|
||||
1#
|
||||
#12000000000
|
||||
0!
|
||||
#12500000000
|
||||
1!
|
||||
b10011100010000 )
|
||||
1+
|
||||
#13000000000
|
||||
0!
|
||||
#13500000000
|
||||
1!
|
||||
#14000000000
|
||||
0!
|
||||
#14500000000
|
||||
1!
|
||||
#15000000000
|
||||
0!
|
||||
#15500000000
|
||||
1!
|
||||
#16000000000
|
||||
0!
|
||||
#16500000000
|
||||
1!
|
||||
0$
|
||||
0#
|
||||
#17000000000
|
||||
0!
|
||||
#17500000000
|
||||
1!
|
||||
b10011100001111 )
|
||||
0+
|
||||
#18000000000
|
||||
0!
|
||||
#18500000000
|
||||
1!
|
||||
b10011100001110 )
|
||||
#19000000000
|
||||
0!
|
||||
#19500000000
|
||||
1!
|
||||
b10011100001101 )
|
||||
#20000000000
|
||||
0!
|
||||
#20500000000
|
||||
1!
|
||||
b10011100001100 )
|
||||
#21000000000
|
||||
0!
|
||||
#21500000000
|
||||
1!
|
||||
b10011100001011 )
|
||||
#22000000000
|
||||
0!
|
||||
#22500000000
|
||||
1!
|
||||
b10011100001010 )
|
||||
#23000000000
|
||||
0!
|
||||
#23500000000
|
||||
1!
|
||||
b10011100001001 )
|
||||
#24000000000
|
||||
0!
|
||||
#24500000000
|
||||
1!
|
||||
b10011100001000 )
|
||||
#25000000000
|
||||
Vendored
+1
@@ -0,0 +1 @@
|
||||
{}
|
||||
+1
@@ -0,0 +1 @@
|
||||
{}
|
||||
+33
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"file-explorer": true,
|
||||
"global-search": true,
|
||||
"switcher": true,
|
||||
"graph": true,
|
||||
"backlink": true,
|
||||
"canvas": true,
|
||||
"outgoing-link": true,
|
||||
"tag-pane": true,
|
||||
"footnotes": false,
|
||||
"properties": false,
|
||||
"page-preview": true,
|
||||
"daily-notes": true,
|
||||
"templates": true,
|
||||
"note-composer": true,
|
||||
"command-palette": true,
|
||||
"slash-command": false,
|
||||
"editor-status": true,
|
||||
"bookmarks": true,
|
||||
"markdown-importer": false,
|
||||
"zk-prefixer": false,
|
||||
"random-note": false,
|
||||
"outline": true,
|
||||
"word-count": true,
|
||||
"slides": false,
|
||||
"audio-recorder": false,
|
||||
"workspaces": false,
|
||||
"file-recovery": true,
|
||||
"publish": false,
|
||||
"sync": true,
|
||||
"bases": true,
|
||||
"webviewer": false
|
||||
}
|
||||
+167
@@ -0,0 +1,167 @@
|
||||
{
|
||||
"main": {
|
||||
"id": "6eef6b982305e97c",
|
||||
"type": "split",
|
||||
"children": [
|
||||
{
|
||||
"id": "ef28aa54abb02b7c",
|
||||
"type": "tabs",
|
||||
"children": [
|
||||
{
|
||||
"id": "dd2aafdfa4873c3e",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "empty",
|
||||
"state": {},
|
||||
"icon": "lucide-file",
|
||||
"title": "New tab"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"direction": "vertical"
|
||||
},
|
||||
"left": {
|
||||
"id": "7dcb0dd958c47669",
|
||||
"type": "split",
|
||||
"children": [
|
||||
{
|
||||
"id": "5addbd6c8b989a49",
|
||||
"type": "tabs",
|
||||
"children": [
|
||||
{
|
||||
"id": "10f89da0d72538c0",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "file-explorer",
|
||||
"state": {
|
||||
"sortOrder": "alphabetical",
|
||||
"autoReveal": false
|
||||
},
|
||||
"icon": "lucide-folder-closed",
|
||||
"title": "Files"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "476834a62536c756",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "search",
|
||||
"state": {
|
||||
"query": "",
|
||||
"matchingCase": false,
|
||||
"explainSearch": false,
|
||||
"collapseAll": false,
|
||||
"extraContext": false,
|
||||
"sortOrder": "alphabetical"
|
||||
},
|
||||
"icon": "lucide-search",
|
||||
"title": "Search"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "ce54c42efc557a72",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "bookmarks",
|
||||
"state": {},
|
||||
"icon": "lucide-bookmark",
|
||||
"title": "Bookmarks"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"direction": "horizontal",
|
||||
"width": 300
|
||||
},
|
||||
"right": {
|
||||
"id": "87b1d8f1ca08108d",
|
||||
"type": "split",
|
||||
"children": [
|
||||
{
|
||||
"id": "69cbc257ba71f388",
|
||||
"type": "tabs",
|
||||
"children": [
|
||||
{
|
||||
"id": "739632e6a61f8d8e",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "backlink",
|
||||
"state": {
|
||||
"collapseAll": false,
|
||||
"extraContext": false,
|
||||
"sortOrder": "alphabetical",
|
||||
"showSearch": false,
|
||||
"searchQuery": "",
|
||||
"backlinkCollapsed": false,
|
||||
"unlinkedCollapsed": true
|
||||
},
|
||||
"icon": "links-coming-in",
|
||||
"title": "Backlinks"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "e20c6e67aeb6eacb",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "outgoing-link",
|
||||
"state": {
|
||||
"linksCollapsed": false,
|
||||
"unlinkedCollapsed": true
|
||||
},
|
||||
"icon": "links-going-out",
|
||||
"title": "Outgoing links"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "858ad7c8f3ac4d90",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "tag",
|
||||
"state": {
|
||||
"sortOrder": "frequency",
|
||||
"useHierarchy": true,
|
||||
"showSearch": false,
|
||||
"searchQuery": ""
|
||||
},
|
||||
"icon": "lucide-tags",
|
||||
"title": "Tags"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "661ea018f1aa1171",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "outline",
|
||||
"state": {
|
||||
"followCursor": false,
|
||||
"showSearch": false,
|
||||
"searchQuery": ""
|
||||
},
|
||||
"icon": "lucide-list",
|
||||
"title": "Outline"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"direction": "horizontal",
|
||||
"width": 300,
|
||||
"collapsed": true
|
||||
},
|
||||
"left-ribbon": {
|
||||
"hiddenItems": {
|
||||
"switcher:Open quick switcher": false,
|
||||
"graph:Open graph view": false,
|
||||
"canvas:Create new canvas": false,
|
||||
"daily-notes:Open today's daily note": false,
|
||||
"templates:Insert template": false,
|
||||
"command-palette:Open command palette": false,
|
||||
"bases:Create new base": false
|
||||
}
|
||||
},
|
||||
"active": "dd2aafdfa4873c3e",
|
||||
"lastOpenFiles": []
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
This project attempts to emulate the Gamecube BroadBand Adapter in an FPGA. The following things need to happen.
|
||||
|
||||
- [x] [[Amaranth-Hdl project setup]]
|
||||
- [x] Setup venv
|
||||
- [x] Install packages
|
||||
- [x] Flash Blinky on icebreaker
|
||||
- [ ] Figuring out how to deal with [[external clocks]].
|
||||
- [x] How to get a clock greater than 12Mhz needed to interface with 32Mhz EXI
|
||||
- [x] PLL configured to 48Mhz
|
||||
- [ ] ~~48Mhz oscillator onboard? ~~
|
||||
- [ ] Check if Clock Domain Crossing is possible.
|
||||
- [ ] Oversampeling approach was tedious but worked
|
||||
- [ ] Interfacing with [[GameCube]]
|
||||
- [ ] Figuring pinout of SP1.
|
||||
- [ ] Unofficial gamecube docs?
|
||||
- [ ] Make sure connecting [[SP1]] to IceBreaker is safe.
|
||||
- [ ] Can we power the FPGA with the SP1?
|
||||
- [ ] How much voltage do we get from SP1.
|
||||
- [ ] How much current can we source?
|
||||
- [ ] Figuring out basic [[EXI protocol]]
|
||||
- [ ] What is the structure of the messages?
|
||||
- [ ] How to know how long the message is
|
||||
- [ ] Integrity checks?
|
||||
- [ ] How fast do we need to respond to a message.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,107 @@
|
||||
from amaranth import *
|
||||
from amaranth.sim import Simulator
|
||||
|
||||
|
||||
class SyncFF(Elaboratable):
|
||||
"""Width-N multi-flop synchronizer from `src_domain` to `dst_domain`.
|
||||
|
||||
Use when the source is a level signal that may be stable for multiple destination
|
||||
cycles. Not suitable for single-cycle pulses (use TogglePulseSync instead).
|
||||
"""
|
||||
|
||||
def __init__(self, width=1, src_domain="src", dst_domain="dst"):
|
||||
self.width = width
|
||||
self.src_domain = src_domain
|
||||
self.dst_domain = dst_domain
|
||||
self.src = Signal(self.width)
|
||||
self.dst = Signal(self.width)
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
reg_src = Signal(self.width)
|
||||
ff0 = Signal(self.width)
|
||||
ff1 = Signal(self.width)
|
||||
|
||||
m.d[self.src_domain] += reg_src.eq(self.src)
|
||||
m.d[self.dst_domain] += ff0.eq(reg_src)
|
||||
m.d[self.dst_domain] += ff1.eq(ff0)
|
||||
m.d.comb += self.dst.eq(ff1)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
class TogglePulseSync(Elaboratable):
|
||||
"""Reliable pulse transfer from `src_domain` into `dst_domain`.
|
||||
|
||||
- Source toggles `toggle` whenever an event occurs.
|
||||
- Destination synchronizes the toggle and detects edges.
|
||||
Guarantees ordering and no lost pulses for single-bit events.
|
||||
"""
|
||||
|
||||
def __init__(self, src_domain="src", dst_domain="dst"):
|
||||
self.src_domain = src_domain
|
||||
self.dst_domain = dst_domain
|
||||
self.src_pulse = Signal()
|
||||
self.dst_pulse = Signal()
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
toggle = Signal()
|
||||
sync0 = Signal()
|
||||
sync1 = Signal()
|
||||
prev = Signal()
|
||||
edge = Signal()
|
||||
|
||||
# Source domain: flip the toggle when a pulse arrives
|
||||
m.d[self.src_domain] += If(self.src_pulse, toggle.eq(~toggle))
|
||||
|
||||
# Destination domain: two-flop synchronize the toggle
|
||||
m.d[self.dst_domain] += sync0.eq(toggle)
|
||||
m.d[self.dst_domain] += sync1.eq(sync0)
|
||||
|
||||
# Detect the change in the destination domain
|
||||
m.d[self.dst_domain] += edge.eq(sync1 ^ prev)
|
||||
m.d[self.dst_domain] += prev.eq(sync1)
|
||||
m.d.comb += self.dst_pulse.eq(edge)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
def _sim_toggle_pulse():
|
||||
"""Simple simulation that drives pulses on the source domain and prints detections on the destination domain."""
|
||||
|
||||
top = Module()
|
||||
t = TogglePulseSync(src_domain="src", dst_domain="dst")
|
||||
top.submodules.t = t
|
||||
|
||||
sim = Simulator(top)
|
||||
# Create two asynchronous clocks (periods chosen arbitrarily for the sim)
|
||||
sim.add_clock(1e-6, domain="src")
|
||||
sim.add_clock(1.5e-6, domain="dst")
|
||||
|
||||
def process():
|
||||
# Wait a little, then generate three source pulses at different phases
|
||||
for _ in range(5):
|
||||
yield
|
||||
|
||||
for i in range(3):
|
||||
yield t.src_pulse.eq(1)
|
||||
yield
|
||||
yield t.src_pulse.eq(0)
|
||||
# let the domains run for a few cycles
|
||||
for _ in range(10):
|
||||
dp = (yield t.dst_pulse)
|
||||
if dp:
|
||||
print(f"dst detected pulse at sim tick")
|
||||
yield
|
||||
|
||||
# run a bit longer to observe behavior
|
||||
for _ in range(20):
|
||||
yield
|
||||
|
||||
sim.add_sync_process(process, domain="src")
|
||||
sim.run_until(100e-6)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_sim_toggle_pulse()
|
||||
@@ -0,0 +1,182 @@
|
||||
from amaranth import *
|
||||
from amaranth.sim import Simulator
|
||||
|
||||
|
||||
def bin_to_gray(x):
|
||||
return x ^ (x >> 1)
|
||||
|
||||
|
||||
def gray_to_bin(g, width):
|
||||
# convert gray to binary iteratively
|
||||
b = 0
|
||||
for i in range(width - 1, -1, -1):
|
||||
if i == width - 1:
|
||||
b |= ((g >> i) & 1) << i
|
||||
else:
|
||||
b |= (((b >> (i + 1)) & 1) ^ ((g >> i) & 1)) << i
|
||||
return b
|
||||
|
||||
|
||||
class AsyncFIFO(Elaboratable):
|
||||
"""Parameterizable gray-pointer dual-clock FIFO.
|
||||
|
||||
- width: data width in bits
|
||||
- depth: must be a power of two
|
||||
- wdomain: write (source) domain name
|
||||
- rdomain: read (destination) domain name
|
||||
"""
|
||||
|
||||
def __init__(self, width=1, depth=16, wdomain="src", rdomain="dst"):
|
||||
assert depth & (depth - 1) == 0
|
||||
self.width = width
|
||||
self.depth = depth
|
||||
self.aw = (depth - 1).bit_length() # address width
|
||||
self.wdomain = wdomain
|
||||
self.rdomain = rdomain
|
||||
|
||||
# write-side interface
|
||||
self.wdata = Signal(width)
|
||||
self.w_en = Signal()
|
||||
self.w_full = Signal()
|
||||
|
||||
# read-side interface
|
||||
self.rdata = Signal(width)
|
||||
self.r_en = Signal()
|
||||
self.r_valid = Signal()
|
||||
self.r_empty = Signal()
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
|
||||
mem = Memory(width=self.width, depth=self.depth)
|
||||
wp = mem.write_port(domain=self.wdomain)
|
||||
rp = mem.read_port(domain=self.rdomain, transparent=False)
|
||||
m.submodules += wp, rp
|
||||
|
||||
# pointers are AW+1 bits (extra MSB for wrap)
|
||||
wbin = Signal(self.aw + 1)
|
||||
wgray = Signal(self.aw + 1)
|
||||
rbin = Signal(self.aw + 1)
|
||||
rgray = Signal(self.aw + 1)
|
||||
|
||||
# synchronized opposing domain gray pointers
|
||||
rgray_sync0 = Signal(self.aw + 1)
|
||||
rgray_sync1 = Signal(self.aw + 1)
|
||||
wgray_sync0 = Signal(self.aw + 1)
|
||||
wgray_sync1 = Signal(self.aw + 1)
|
||||
|
||||
# write domain logic
|
||||
with m.Domain(self.wdomain):
|
||||
waddr = Signal(self.aw)
|
||||
next_wbin = Signal(self.aw + 1)
|
||||
next_wgray = Signal(self.aw + 1)
|
||||
|
||||
# compute next pointer
|
||||
m.d.comb += next_wbin.eq(wbin + self.w_en)
|
||||
m.d.comb += next_wgray.eq(next_wbin ^ (next_wbin >> 1))
|
||||
|
||||
# synchronize rgray into write domain (two flops per bit)
|
||||
m.d.comb += []
|
||||
for i in range(self.aw + 1):
|
||||
m.d[self.wdomain] += rgray_sync0[i].eq(rgray[i])
|
||||
m.d[self.wdomain] += rgray_sync1[i].eq(rgray_sync0[i])
|
||||
|
||||
# full detection: next_wgray equals rgray_sync with top two bits inverted
|
||||
if self.aw >= 1:
|
||||
top = self.aw
|
||||
msb_cmp = Signal()
|
||||
low_eq = Signal()
|
||||
m.d.comb += low_eq.eq(next_wgray[top - 1:0] == rgray_sync1[top - 1:0])
|
||||
m.d.comb += msb_cmp.eq((next_wgray[top] != rgray_sync1[top]) & (next_wgray[top - 1] != rgray_sync1[top - 1]))
|
||||
m.d.comb += self.w_full.eq(low_eq & msb_cmp)
|
||||
else:
|
||||
# depth==2 special case
|
||||
m.d.comb += self.w_full.eq(next_wgray != rgray_sync1)
|
||||
|
||||
# write to memory when enabled & not full
|
||||
with m.If(self.w_en & ~self.w_full):
|
||||
m.d[self.wdomain] += wp.addr.eq(wbin[self.aw - 1:0])
|
||||
m.d[self.wdomain] += wp.data.eq(self.wdata)
|
||||
m.d[self.wdomain] += wp.en.eq(1)
|
||||
m.d[self.wdomain] += wbin.eq(next_wbin)
|
||||
m.d[self.wdomain] += wgray.eq(next_wgray)
|
||||
with m.Else():
|
||||
m.d[self.wdomain] += wp.en.eq(0)
|
||||
|
||||
# read domain logic
|
||||
with m.Domain(self.rdomain):
|
||||
raddr = Signal(self.aw)
|
||||
next_rbin = Signal(self.aw + 1)
|
||||
next_rgray = Signal(self.aw + 1)
|
||||
|
||||
# compute next pointer
|
||||
m.d.comb += next_rbin.eq(rbin + self.r_en)
|
||||
m.d.comb += next_rgray.eq(next_rbin ^ (next_rbin >> 1))
|
||||
|
||||
# synchronize wgray into read domain
|
||||
for i in range(self.aw + 1):
|
||||
m.d[self.rdomain] += wgray_sync0[i].eq(wgray[i])
|
||||
m.d[self.rdomain] += wgray_sync1[i].eq(wgray_sync0[i])
|
||||
|
||||
# empty detection
|
||||
m.d.comb += self.r_empty.eq(rgray == wgray_sync1)
|
||||
|
||||
# read when enabled and not empty
|
||||
with m.If(self.r_en & ~self.r_empty):
|
||||
m.d[self.rdomain] += rp.addr.eq(rbin[self.aw - 1:0])
|
||||
m.d[self.rdomain] += rp.en.eq(1)
|
||||
m.d[self.rdomain] += rbin.eq(next_rbin)
|
||||
m.d[self.rdomain] += rgray.eq(next_rgray)
|
||||
m.d[self.rdomain] += self.r_valid.eq(1)
|
||||
m.d[self.rdomain] += self.rdata.eq(rp.data)
|
||||
with m.Else():
|
||||
m.d[self.rdomain] += rp.en.eq(0)
|
||||
m.d[self.rdomain] += self.r_valid.eq(0)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
def _sim_fifo():
|
||||
top = Module()
|
||||
fifo = AsyncFIFO(width=1, depth=16, wdomain="src", rdomain="dst")
|
||||
top.submodules.fifo = fifo
|
||||
|
||||
sim = Simulator(top)
|
||||
sim.add_clock(1e-6, domain="src")
|
||||
sim.add_clock(1.7e-6, domain="dst")
|
||||
|
||||
def writer():
|
||||
# write a sequence of bits (0..31 repeating pattern)
|
||||
for i in range(32):
|
||||
yield fifo.wdata.eq(i & 1)
|
||||
yield fifo.w_en.eq(1)
|
||||
yield
|
||||
yield fifo.w_en.eq(0)
|
||||
# allow some idle cycles
|
||||
for _ in range((i % 3)):
|
||||
yield
|
||||
|
||||
def reader():
|
||||
seen = []
|
||||
for _ in range(200):
|
||||
# try to consume if not empty
|
||||
empty = (yield fifo.r_empty)
|
||||
if not empty:
|
||||
yield fifo.r_en.eq(1)
|
||||
yield
|
||||
yield fifo.r_en.eq(0)
|
||||
if (yield fifo.r_valid):
|
||||
d = (yield fifo.rdata)
|
||||
seen.append(d)
|
||||
print(f"read: {d}")
|
||||
else:
|
||||
yield
|
||||
print(f"total read: {len(seen)}")
|
||||
|
||||
sim.add_sync_process(writer, domain="src")
|
||||
sim.add_sync_process(reader, domain="dst")
|
||||
sim.run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_sim_fifo()
|
||||
@@ -0,0 +1,119 @@
|
||||
"""IceBreaker (iCE40 UP5K) vendor-backed async FIFO example.
|
||||
|
||||
This module uses Amaranth's `Memory` with separate write/read ports in different
|
||||
clock domains. With the icestorm toolchain the memory typically maps to
|
||||
`SB_RAM40_4K` block RAMs. The control (full/empty) is implemented with
|
||||
gray-pointer logic and two-stage synchronization of opposing pointers.
|
||||
|
||||
Notes:
|
||||
- This prefers block RAM for storage (small LUT usage, lower power).
|
||||
- The write/read ports are in independent domains; backend maps ports to
|
||||
dual-port RAM primitives when available.
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
|
||||
|
||||
class Ice40AsyncFIFO(Elaboratable):
|
||||
def __init__(self, depth=256, wdomain="src", rdomain="dst"):
|
||||
assert depth & (depth - 1) == 0, "depth must be power of two"
|
||||
self.depth = depth
|
||||
self.aw = (depth - 1).bit_length()
|
||||
self.wdomain = wdomain
|
||||
self.rdomain = rdomain
|
||||
|
||||
# serial (1-bit) interface
|
||||
self.wdata = Signal()
|
||||
self.w_en = Signal()
|
||||
self.w_full = Signal()
|
||||
|
||||
self.rdata = Signal()
|
||||
self.r_en = Signal()
|
||||
self.r_valid = Signal()
|
||||
self.r_empty = Signal()
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
|
||||
# single-bit-wide memory mapped to vendor BRAMs by the backend
|
||||
mem = Memory(width=1, depth=self.depth)
|
||||
wp = mem.write_port(domain=self.wdomain)
|
||||
rp = mem.read_port(domain=self.rdomain, transparent=False)
|
||||
m.submodules += wp, rp
|
||||
|
||||
# pointers (aw+1 bits to include wrap bit)
|
||||
wbin = Signal(self.aw + 1)
|
||||
wgray = Signal(self.aw + 1)
|
||||
rbin = Signal(self.aw + 1)
|
||||
rgray = Signal(self.aw + 1)
|
||||
|
||||
# sync registers for opposing pointers (two-stage)
|
||||
rgray_sync0 = Signal(self.aw + 1)
|
||||
rgray_sync1 = Signal(self.aw + 1)
|
||||
wgray_sync0 = Signal(self.aw + 1)
|
||||
wgray_sync1 = Signal(self.aw + 1)
|
||||
|
||||
# write-side
|
||||
with m.Domain(self.wdomain):
|
||||
next_wbin = Signal(self.aw + 1)
|
||||
next_wgray = Signal(self.aw + 1)
|
||||
m.d.comb += next_wbin.eq(wbin + self.w_en)
|
||||
m.d.comb += next_wgray.eq(next_wbin ^ (next_wbin >> 1))
|
||||
|
||||
# sync read pointer into write domain
|
||||
for i in range(self.aw + 1):
|
||||
m.d[self.wdomain] += rgray_sync0[i].eq(rgray[i])
|
||||
m.d[self.wdomain] += rgray_sync1[i].eq(rgray_sync0[i])
|
||||
|
||||
# full detection (standard gray-pointer trick)
|
||||
top = self.aw
|
||||
low_eq = Signal()
|
||||
msb_cmp = Signal()
|
||||
m.d.comb += low_eq.eq(next_wgray[top - 1:0] == rgray_sync1[top - 1:0])
|
||||
m.d.comb += msb_cmp.eq((next_wgray[top] != rgray_sync1[top]) & (next_wgray[top - 1] != rgray_sync1[top - 1]))
|
||||
m.d.comb += self.w_full.eq(low_eq & msb_cmp)
|
||||
|
||||
# perform write
|
||||
with m.If(self.w_en & ~self.w_full):
|
||||
m.d[self.wdomain] += wp.addr.eq(wbin[self.aw - 1:0])
|
||||
m.d[self.wdomain] += wp.data.eq(self.wdata)
|
||||
m.d[self.wdomain] += wp.en.eq(1)
|
||||
m.d[self.wdomain] += wbin.eq(next_wbin)
|
||||
m.d[self.wdomain] += wgray.eq(next_wgray)
|
||||
with m.Else():
|
||||
m.d[self.wdomain] += wp.en.eq(0)
|
||||
|
||||
# read-side
|
||||
with m.Domain(self.rdomain):
|
||||
next_rbin = Signal(self.aw + 1)
|
||||
next_rgray = Signal(self.aw + 1)
|
||||
m.d.comb += next_rbin.eq(rbin + self.r_en)
|
||||
m.d.comb += next_rgray.eq(next_rbin ^ (next_rbin >> 1))
|
||||
|
||||
# sync write pointer into read domain
|
||||
for i in range(self.aw + 1):
|
||||
m.d[self.rdomain] += wgray_sync0[i].eq(wgray[i])
|
||||
m.d[self.rdomain] += wgray_sync1[i].eq(wgray_sync0[i])
|
||||
|
||||
m.d.comb += self.r_empty.eq(rgray == wgray_sync1)
|
||||
|
||||
with m.If(self.r_en & ~self.r_empty):
|
||||
m.d[self.rdomain] += rp.addr.eq(rbin[self.aw - 1:0])
|
||||
m.d[self.rdomain] += rp.en.eq(1)
|
||||
m.d[self.rdomain] += rbin.eq(next_rbin)
|
||||
m.d[self.rdomain] += rgray.eq(next_rgray)
|
||||
m.d[self.rdomain] += self.r_valid.eq(1)
|
||||
m.d[self.rdomain] += self.rdata.eq(rp.data)
|
||||
with m.Else():
|
||||
m.d[self.rdomain] += rp.en.eq(0)
|
||||
m.d[self.rdomain] += self.r_valid.eq(0)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Quick smoke-check: instantiate and print fragment
|
||||
from amaranth.back import verilog
|
||||
|
||||
fifo = Ice40AsyncFIFO(depth=256)
|
||||
print(verilog.convert(fifo, ports=[fifo.wdata, fifo.w_en, fifo.w_full, fifo.rdata, fifo.r_en, fifo.r_valid, fifo.r_empty]))
|
||||
@@ -0,0 +1,617 @@
|
||||
"""BBA register file — EXI domain.
|
||||
|
||||
Decodes EXI transactions (2-byte header + N data bytes), reads/writes the BBA
|
||||
register space, and owns all AsyncFIFO / PulseSynchronizer CDC primitives.
|
||||
|
||||
Transaction header format
|
||||
--------------------------
|
||||
Byte 0 [7] write_flag
|
||||
[6:0] addr[12:6]
|
||||
Byte 1 [7:2] addr[5:0]
|
||||
[1:0] xfer_len−1 (0=1B, 1=2B, 2=3B, 3=4B)
|
||||
|
||||
Addresses 0x0000–0x00FF : register file (sparse individual Signals, exi domain).
|
||||
Addresses 0x0100–0x1FFF : SPRAM ring buffer (sync domain, prefetch FIFOs).
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
from amaranth.lib.cdc import PulseSynchronizer
|
||||
from amaranth.lib.fifo import AsyncFIFO
|
||||
|
||||
__all__ = ["BBARegisterFile"]
|
||||
|
||||
# Register addresses
|
||||
_NCRA = 0x00
|
||||
_IMR = 0x08
|
||||
_IR = 0x09
|
||||
_RWP_LO = 0x16
|
||||
_RWP_HI = 0x17
|
||||
_RRP_LO = 0x18
|
||||
_RRP_HI = 0x19
|
||||
_PAR0 = 0x20
|
||||
_PAR1 = 0x21
|
||||
_PAR2 = 0x22
|
||||
_PAR3 = 0x23
|
||||
_PAR4 = 0x24
|
||||
_PAR5 = 0x25
|
||||
_NWAYS = 0x31
|
||||
_HIPR = 0x3A
|
||||
_TWD_LO = 0x34
|
||||
_TWD_HI = 0x35
|
||||
_TXDATA = 0x48
|
||||
|
||||
# Read-only hardcoded values
|
||||
_NWAYS_VAL = 0x17
|
||||
_HIPR_VAL = 0x01
|
||||
|
||||
# Device ID returned on first 4-byte read of addr 0x0000
|
||||
_DEVICE_ID = [0x04, 0x02, 0x02, 0x00]
|
||||
|
||||
|
||||
class BBARegisterFile(Elaboratable):
|
||||
"""EXI transaction decoder and BBA register file with CDC bridges.
|
||||
|
||||
Sync-domain FIFO/pulse ports are wired by BBATop to the sync-domain modules.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# ── EXI byte-stream interface (exi domain, from/to ExiCapture) ────
|
||||
# RX: received bytes (header + write data + read dummies) — FWFT read
|
||||
# side of ExiCapture's rx_fifo.
|
||||
self.rx_data = Signal(8)
|
||||
self.rx_rdy = Signal()
|
||||
self.rx_en = Signal()
|
||||
# TX: response bytes pushed proactively into ExiCapture's tx_fifo.
|
||||
self.tx_data = Signal(8)
|
||||
self.tx_en = Signal()
|
||||
self.tx_rdy = Signal()
|
||||
|
||||
# High while an EXI transaction is in progress (from ExiCapture).
|
||||
# SPRAM reads stream until this deasserts → supports variable-length
|
||||
# (DMA) bulk reads, not just ≤4-byte immediate transfers.
|
||||
self.cs_active = Signal()
|
||||
|
||||
# ── Interrupt (exi domain) ────────────────────────────────────────
|
||||
self.exi_int_n = Signal(init=1)
|
||||
|
||||
# ── PAR output (for forwarding to W5500 as source MAC) ───────────
|
||||
self.par = Signal(48) # PAR0-5 packed: PAR0 in low byte par[0:8]
|
||||
|
||||
# NCRA[3] = SR (start receive) bit — gates the RX ring-buffer path.
|
||||
self.ncra_sr = Signal()
|
||||
|
||||
# ── CDC FIFO sync-domain sides (wired by BBATop) ──────────────────
|
||||
# SPRAM request exi→sync: sync reads these
|
||||
self.spram_req_r_data = Signal(16)
|
||||
self.spram_req_r_en = Signal()
|
||||
self.spram_req_r_rdy = Signal()
|
||||
|
||||
# SPRAM response sync→exi: sync writes these
|
||||
self.spram_rsp_w_data = Signal(8)
|
||||
self.spram_rsp_w_en = Signal()
|
||||
self.spram_rsp_w_rdy = Signal()
|
||||
|
||||
# TX bytes exi→sync: sync reads these
|
||||
self.tx_bytes_r_data = Signal(8)
|
||||
self.tx_bytes_r_en = Signal()
|
||||
self.tx_bytes_r_rdy = Signal()
|
||||
|
||||
# TX ctrl (frame length) exi→sync: sync reads these
|
||||
self.tx_ctrl_r_data = Signal(16)
|
||||
self.tx_ctrl_r_en = Signal()
|
||||
self.tx_ctrl_r_rdy = Signal()
|
||||
|
||||
# RX write-pointer update sync→exi: sync writes these
|
||||
self.rx_wptr_w_data = Signal(8)
|
||||
self.rx_wptr_w_en = Signal()
|
||||
self.rx_wptr_w_rdy = Signal()
|
||||
|
||||
# RX read-pointer update exi→sync: sync reads these
|
||||
self.rx_rptr_r_data = Signal(8)
|
||||
self.rx_rptr_r_en = Signal()
|
||||
self.rx_rptr_r_rdy = Signal()
|
||||
|
||||
# PulseSynchronizer ports (exi↔sync)
|
||||
self.ncra_rst_o = Signal() # exi→sync
|
||||
self.rx_irq_i = Signal() # sync→exi
|
||||
self.tx_irq_i = Signal() # sync→exi
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
|
||||
# ── CDC FIFOs ────────────────────────────────────────────────────
|
||||
spram_req = AsyncFIFO(width=16, depth=4, w_domain="exi", r_domain="sync")
|
||||
spram_rsp = AsyncFIFO(width=8, depth=4, w_domain="sync", r_domain="exi")
|
||||
tx_bytes = AsyncFIFO(width=8, depth=16, w_domain="exi", r_domain="sync")
|
||||
tx_ctrl = AsyncFIFO(width=16, depth=4, w_domain="exi", r_domain="sync")
|
||||
rx_wptr = AsyncFIFO(width=8, depth=4, w_domain="sync", r_domain="exi")
|
||||
rx_rptr = AsyncFIFO(width=8, depth=4, w_domain="exi", r_domain="sync")
|
||||
|
||||
m.submodules.spram_req = spram_req
|
||||
m.submodules.spram_rsp = spram_rsp
|
||||
m.submodules.tx_bytes = tx_bytes
|
||||
m.submodules.tx_ctrl = tx_ctrl
|
||||
m.submodules.rx_wptr = rx_wptr
|
||||
m.submodules.rx_rptr = rx_rptr
|
||||
|
||||
# Expose sync-domain FIFO sides
|
||||
m.d.comb += [
|
||||
self.spram_req_r_data .eq(spram_req.r_data),
|
||||
spram_req.r_en .eq(self.spram_req_r_en),
|
||||
self.spram_req_r_rdy .eq(spram_req.r_rdy),
|
||||
|
||||
spram_rsp.w_data .eq(self.spram_rsp_w_data),
|
||||
spram_rsp.w_en .eq(self.spram_rsp_w_en),
|
||||
self.spram_rsp_w_rdy .eq(spram_rsp.w_rdy),
|
||||
|
||||
self.tx_bytes_r_data .eq(tx_bytes.r_data),
|
||||
tx_bytes.r_en .eq(self.tx_bytes_r_en),
|
||||
self.tx_bytes_r_rdy .eq(tx_bytes.r_rdy),
|
||||
|
||||
self.tx_ctrl_r_data .eq(tx_ctrl.r_data),
|
||||
tx_ctrl.r_en .eq(self.tx_ctrl_r_en),
|
||||
self.tx_ctrl_r_rdy .eq(tx_ctrl.r_rdy),
|
||||
|
||||
rx_wptr.w_data .eq(self.rx_wptr_w_data),
|
||||
rx_wptr.w_en .eq(self.rx_wptr_w_en),
|
||||
self.rx_wptr_w_rdy .eq(rx_wptr.w_rdy),
|
||||
|
||||
self.rx_rptr_r_data .eq(rx_rptr.r_data),
|
||||
rx_rptr.r_en .eq(self.rx_rptr_r_en),
|
||||
self.rx_rptr_r_rdy .eq(rx_rptr.r_rdy),
|
||||
]
|
||||
|
||||
# ── PulseSynchronizers ───────────────────────────────────────────
|
||||
ncra_rst_ps = PulseSynchronizer(i_domain="exi", o_domain="sync")
|
||||
rx_irq_ps = PulseSynchronizer(i_domain="sync", o_domain="exi")
|
||||
tx_irq_ps = PulseSynchronizer(i_domain="sync", o_domain="exi")
|
||||
|
||||
m.submodules.ncra_rst_ps = ncra_rst_ps
|
||||
m.submodules.rx_irq_ps = rx_irq_ps
|
||||
m.submodules.tx_irq_ps = tx_irq_ps
|
||||
|
||||
m.d.comb += [
|
||||
self.ncra_rst_o .eq(ncra_rst_ps.o),
|
||||
rx_irq_ps.i .eq(self.rx_irq_i),
|
||||
tx_irq_ps.i .eq(self.tx_irq_i),
|
||||
]
|
||||
|
||||
# ── Register file (sparse individual Signals, exi domain) ────────
|
||||
# Only the registers actually read/written by the GC or sync domain.
|
||||
# Writes to unknown addresses are silently ignored; reads return 0.
|
||||
r_ncra = Signal(8)
|
||||
r_imr = Signal(8)
|
||||
r_ir = Signal(8)
|
||||
r_rwp_lo = Signal(8)
|
||||
r_rrp_lo = Signal(8)
|
||||
# PAR0–5 reset to a valid Nintendo OUI MAC (00:09:BF:00:00:01) so the
|
||||
# device has a sane source MAC even before the GC driver programs its
|
||||
# own. PAR0 is the first MAC octet.
|
||||
_par_reset = [0x00, 0x09, 0xBF, 0x00, 0x00, 0x01]
|
||||
r_par = Array([Signal(8, name=f"par{i}", init=_par_reset[i])
|
||||
for i in range(6)])
|
||||
r_twd_lo = Signal(8)
|
||||
r_twd_hi = Signal(8)
|
||||
|
||||
# PAR packed output: PAR0 in the LOW byte (par[0:8]). The W5500 master
|
||||
# reads mac_shadow[i] = par[i*8:(i+1)*8], so this puts PAR0 first in the
|
||||
# SHAR write — i.e. PAR0 is the first MAC octet on the wire.
|
||||
m.d.comb += self.par.eq(Cat(
|
||||
r_par[0], r_par[1], r_par[2], r_par[3], r_par[4], r_par[5],
|
||||
))
|
||||
m.d.comb += self.ncra_sr.eq(r_ncra[3]) # start-receive bit
|
||||
|
||||
# ── Transaction state ────────────────────────────────────────────
|
||||
hdr0 = Signal(8)
|
||||
addr = Signal(13)
|
||||
is_write = Signal()
|
||||
xfer_len = Signal(2) # 0=1B … 3=4B
|
||||
byte_ctr = Signal(2)
|
||||
tx_frame_len = Signal(16)
|
||||
|
||||
# True until first NCRA reset write: return device ID on addr=0 reads
|
||||
id_phase = Signal(init=1)
|
||||
|
||||
# Per-byte SPRAM read handshake (register-read path): sp_req marks a
|
||||
# request in flight; drain_ctr counts the read-phase dummy bytes.
|
||||
sp_req = Signal()
|
||||
drain_ctr = Signal(2)
|
||||
|
||||
# SPRAM streaming-read state (DMA / variable-length reads):
|
||||
# sp_addr — next SPRAM byte address to request (auto-increments)
|
||||
# outstanding — SPRAM requests issued but whose responses are not yet
|
||||
# popped (bounds prefetch and is drained at end)
|
||||
sp_addr = Signal(13)
|
||||
outstanding = Signal(4)
|
||||
SP_LIMIT = 4 # max prefetch depth in flight
|
||||
|
||||
# Effective address of the current data byte — a REGISTERED running
|
||||
# pointer (set to the base in HEADER1, incremented per byte). Keeping
|
||||
# it registered keeps the 13-bit adder off the combinational path that
|
||||
# feeds the read-response mux → tx_fifo write data.
|
||||
eff_addr = Signal(13)
|
||||
rd_sel = eff_addr[0:8]
|
||||
|
||||
# ── Combinational read-response value (non-SPRAM) ────────────────
|
||||
reg_rdval = Signal(8)
|
||||
with m.Switch(rd_sel):
|
||||
with m.Case(_NCRA): m.d.comb += reg_rdval.eq(r_ncra)
|
||||
with m.Case(_IMR): m.d.comb += reg_rdval.eq(r_imr)
|
||||
with m.Case(_IR): m.d.comb += reg_rdval.eq(r_ir)
|
||||
with m.Case(_RWP_LO): m.d.comb += reg_rdval.eq(r_rwp_lo)
|
||||
with m.Case(_RRP_LO): m.d.comb += reg_rdval.eq(r_rrp_lo)
|
||||
with m.Case(_PAR0, _PAR1, _PAR2, _PAR3, _PAR4, _PAR5):
|
||||
m.d.comb += reg_rdval.eq(r_par[eff_addr[0:3]])
|
||||
with m.Case(_TWD_LO): m.d.comb += reg_rdval.eq(r_twd_lo)
|
||||
with m.Case(_TWD_HI): m.d.comb += reg_rdval.eq(r_twd_hi)
|
||||
with m.Case(_NWAYS): m.d.comb += reg_rdval.eq(_NWAYS_VAL)
|
||||
with m.Case(_HIPR): m.d.comb += reg_rdval.eq(_HIPR_VAL)
|
||||
with m.Default(): m.d.comb += reg_rdval.eq(0)
|
||||
|
||||
# Device-ID bytes (addr 0 read while id_phase): 0x04 0x02 0x02 0x00
|
||||
devid = Signal(8)
|
||||
with m.Switch(byte_ctr):
|
||||
with m.Case(0): m.d.comb += devid.eq(0x04)
|
||||
with m.Case(1): m.d.comb += devid.eq(0x02)
|
||||
with m.Case(2): m.d.comb += devid.eq(0x02)
|
||||
with m.Case(3): m.d.comb += devid.eq(0x00)
|
||||
|
||||
rd_val = Signal(8) # response for the current non-SPRAM read byte
|
||||
with m.If((addr == 0) & id_phase):
|
||||
m.d.comb += rd_val.eq(devid)
|
||||
with m.Else():
|
||||
m.d.comb += rd_val.eq(reg_rdval)
|
||||
|
||||
# ── Default strobes ──────────────────────────────────────────────
|
||||
m.d.exi += [
|
||||
spram_req.w_en .eq(0),
|
||||
tx_bytes.w_en .eq(0),
|
||||
tx_ctrl.w_en .eq(0),
|
||||
rx_rptr.w_en .eq(0),
|
||||
rx_wptr.r_en .eq(0),
|
||||
ncra_rst_ps.i .eq(0),
|
||||
]
|
||||
m.d.comb += [
|
||||
self.rx_en .eq(0),
|
||||
self.tx_en .eq(0),
|
||||
self.tx_data.eq(0),
|
||||
# Combinational so the FIFO advances in the SAME cycle as the pop —
|
||||
# a registered r_en would let `pop` re-fire on the same byte.
|
||||
spram_rsp.r_en.eq(0),
|
||||
]
|
||||
|
||||
# ── Transaction FSM (proactive push/pull over byte FIFOs) ────────
|
||||
# The SPI bit cadence lives in the capture domain; here we just consume
|
||||
# received bytes and, for reads, push response bytes into tx_fifo during
|
||||
# the EXI clock-idle gap before the GC clocks the data phase.
|
||||
with m.FSM(domain="exi", name="exi_fsm"):
|
||||
|
||||
with m.State("HEADER0"):
|
||||
with m.If(self.rx_rdy):
|
||||
m.d.comb += self.rx_en.eq(1)
|
||||
m.d.exi += hdr0.eq(self.rx_data)
|
||||
m.next = "HEADER1"
|
||||
|
||||
with m.State("HEADER1"):
|
||||
with m.If(self.rx_rdy):
|
||||
m.d.comb += self.rx_en.eq(1)
|
||||
new_addr = Cat(self.rx_data[2:8], hdr0[0:7]) # 13-bit addr
|
||||
new_len = self.rx_data[0:2]
|
||||
new_write = hdr0[7]
|
||||
|
||||
m.d.exi += addr.eq(new_addr)
|
||||
m.d.exi += eff_addr.eq(new_addr) # running pointer init
|
||||
m.d.exi += xfer_len.eq(new_len)
|
||||
m.d.exi += is_write.eq(new_write)
|
||||
m.d.exi += byte_ctr.eq(0)
|
||||
m.d.exi += sp_req.eq(0)
|
||||
m.d.exi += drain_ctr.eq(0)
|
||||
|
||||
with m.If(new_write):
|
||||
m.next = "WRITE"
|
||||
with m.Elif(new_addr >= 0x100):
|
||||
# SPRAM region: stream until CS deasserts (DMA-capable).
|
||||
m.d.exi += sp_addr.eq(new_addr)
|
||||
m.d.exi += outstanding.eq(0)
|
||||
m.next = "SPRAM_STREAM"
|
||||
with m.Else():
|
||||
m.next = "REG_READ"
|
||||
|
||||
with m.State("WRITE"):
|
||||
# Consume xfer_len+1 data bytes, writing the register file.
|
||||
with m.If(self.rx_rdy):
|
||||
m.d.comb += self.rx_en.eq(1)
|
||||
with m.Switch(rd_sel):
|
||||
with m.Case(_NCRA):
|
||||
m.d.exi += r_ncra.eq(self.rx_data)
|
||||
with m.If(self.rx_data[0]):
|
||||
m.d.exi += r_ncra[0].eq(0) # RESET self-clears
|
||||
m.d.exi += ncra_rst_ps.i.eq(1)
|
||||
m.d.exi += id_phase.eq(0)
|
||||
with m.If(self.rx_data[1:3].any()):
|
||||
with m.If(tx_ctrl.w_rdy):
|
||||
m.d.exi += tx_ctrl.w_data.eq(tx_frame_len)
|
||||
m.d.exi += tx_ctrl.w_en.eq(1)
|
||||
with m.Case(_IMR):
|
||||
m.d.exi += r_imr.eq(self.rx_data)
|
||||
with m.Case(_IR):
|
||||
m.d.exi += r_ir.eq(r_ir & ~self.rx_data) # write-1-clear
|
||||
with m.Case(_RRP_LO):
|
||||
m.d.exi += r_rrp_lo.eq(self.rx_data)
|
||||
with m.If(rx_rptr.w_rdy):
|
||||
m.d.exi += rx_rptr.w_data.eq(self.rx_data)
|
||||
m.d.exi += rx_rptr.w_en.eq(1)
|
||||
with m.Case(_PAR0, _PAR1, _PAR2, _PAR3, _PAR4, _PAR5):
|
||||
m.d.exi += r_par[eff_addr[0:3]].eq(self.rx_data)
|
||||
with m.Case(_TWD_LO):
|
||||
m.d.exi += r_twd_lo.eq(self.rx_data)
|
||||
m.d.exi += tx_frame_len[0:8].eq(self.rx_data)
|
||||
with m.Case(_TWD_HI):
|
||||
m.d.exi += r_twd_hi.eq(self.rx_data)
|
||||
m.d.exi += tx_frame_len[8:16].eq(self.rx_data)
|
||||
with m.Case(_TXDATA):
|
||||
with m.If(tx_bytes.w_rdy):
|
||||
m.d.exi += tx_bytes.w_data.eq(self.rx_data)
|
||||
m.d.exi += tx_bytes.w_en.eq(1)
|
||||
# All other addresses silently ignored
|
||||
|
||||
with m.If(byte_ctr == xfer_len):
|
||||
m.next = "HEADER0"
|
||||
with m.Else():
|
||||
m.d.exi += byte_ctr.eq(byte_ctr + 1)
|
||||
m.d.exi += eff_addr.eq(eff_addr + 1)
|
||||
|
||||
with m.State("REG_READ"):
|
||||
# Register / device-ID read (addr < 0x100): value available
|
||||
# immediately, bounded by the header's xfer_len (≤4 bytes).
|
||||
with m.If(self.tx_rdy):
|
||||
m.d.comb += self.tx_data.eq(rd_val)
|
||||
m.d.comb += self.tx_en.eq(1)
|
||||
with m.If(byte_ctr == xfer_len):
|
||||
m.next = "READ_DRAIN"
|
||||
with m.Else():
|
||||
m.d.exi += byte_ctr.eq(byte_ctr + 1)
|
||||
m.d.exi += eff_addr.eq(eff_addr + 1)
|
||||
|
||||
with m.State("READ_DRAIN"):
|
||||
# Discard the xfer_len+1 dummy bytes the GC clocks while reading.
|
||||
with m.If(self.rx_rdy):
|
||||
m.d.comb += self.rx_en.eq(1)
|
||||
with m.If(drain_ctr == xfer_len):
|
||||
m.next = "HEADER0"
|
||||
with m.Else():
|
||||
m.d.exi += drain_ctr.eq(drain_ctr + 1)
|
||||
|
||||
with m.State("SPRAM_STREAM"):
|
||||
# Stream SPRAM bytes until CS deasserts — handles both ≤4-byte
|
||||
# immediate reads and arbitrary-length DMA reads uniformly.
|
||||
# Issue read requests ahead (prefetch, bounded by SP_LIMIT) and
|
||||
# push responses into tx_fifo; the capture domain pops them as
|
||||
# the GC clocks. Drain rx dummies as they arrive.
|
||||
issue = Signal()
|
||||
pop = Signal()
|
||||
m.d.comb += issue.eq(self.cs_active & spram_req.w_rdy
|
||||
& (outstanding < SP_LIMIT))
|
||||
m.d.comb += pop.eq(spram_rsp.r_rdy & self.tx_rdy)
|
||||
|
||||
with m.If(issue):
|
||||
m.d.exi += spram_req.w_data.eq(sp_addr)
|
||||
m.d.exi += spram_req.w_en.eq(1)
|
||||
m.d.exi += sp_addr.eq(sp_addr + 1)
|
||||
with m.If(pop):
|
||||
m.d.comb += self.tx_data.eq(spram_rsp.r_data)
|
||||
m.d.comb += self.tx_en.eq(1)
|
||||
m.d.comb += spram_rsp.r_en.eq(1)
|
||||
m.d.exi += outstanding.eq(outstanding + issue - pop)
|
||||
|
||||
with m.If(self.rx_rdy):
|
||||
m.d.comb += self.rx_en.eq(1) # drain dummy bytes
|
||||
|
||||
with m.If(~self.cs_active):
|
||||
m.next = "SPRAM_END"
|
||||
|
||||
with m.State("SPRAM_END"):
|
||||
# CS deasserted: drain in-flight SPRAM responses and rx dummies,
|
||||
# then idle. Leftover prefetch in tx_fifo is flushed by
|
||||
# ExiCapture on the next CS assertion.
|
||||
with m.If(spram_rsp.r_rdy):
|
||||
m.d.comb += spram_rsp.r_en.eq(1)
|
||||
m.d.exi += outstanding.eq(outstanding - 1)
|
||||
with m.If(self.rx_rdy):
|
||||
m.d.comb += self.rx_en.eq(1)
|
||||
with m.If((outstanding == 0) & ~self.rx_rdy & ~spram_rsp.r_rdy):
|
||||
m.next = "HEADER0"
|
||||
|
||||
# ── Interrupt output ─────────────────────────────────────────────
|
||||
m.d.exi += self.exi_int_n.eq(~(r_ir & r_imr).any())
|
||||
|
||||
# ── Consume RWP updates from sync domain ──────────────────────────
|
||||
with m.If(rx_wptr.r_rdy):
|
||||
m.d.exi += rx_wptr.r_en.eq(1)
|
||||
m.d.exi += r_rwp_lo.eq(rx_wptr.r_data)
|
||||
|
||||
# ── PulseSynchronizer arrivals ────────────────────────────────────
|
||||
with m.If(rx_irq_ps.o):
|
||||
m.d.exi += r_ir[1].eq(1) # RI bit
|
||||
with m.If(tx_irq_ps.o):
|
||||
m.d.exi += r_ir[2].eq(1) # TI bit
|
||||
m.d.exi += r_ncra[1:3].eq(0) # clear ST bits
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Testbench ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
||||
|
||||
from amaranth.sim import Simulator, Period
|
||||
|
||||
reg = BBARegisterFile()
|
||||
|
||||
# Drive the byte-stream interface directly (the SPI bit cadence and FIFOs
|
||||
# live in ExiCapture; here we model the byte producer/consumer).
|
||||
async def push_rx(ctx, b):
|
||||
"""Present one received byte and wait for the register file to pop it."""
|
||||
ctx.set(reg.rx_data, b)
|
||||
ctx.set(reg.rx_rdy, 1)
|
||||
while True:
|
||||
en = ctx.get(reg.rx_en)
|
||||
await ctx.tick("exi")
|
||||
if en:
|
||||
break
|
||||
ctx.set(reg.rx_rdy, 0)
|
||||
|
||||
async def collect_tx(ctx, n):
|
||||
"""Collect n response bytes pushed by the register file (bounded)."""
|
||||
out = []
|
||||
for _ in range(3000):
|
||||
if ctx.get(reg.tx_en):
|
||||
out.append(ctx.get(reg.tx_data))
|
||||
if len(out) >= n:
|
||||
break
|
||||
await ctx.tick("exi")
|
||||
return out
|
||||
|
||||
async def exi_read(ctx, addr, length=1):
|
||||
hdr0 = (addr >> 6) & 0x7F
|
||||
hdr1 = ((addr & 0x3F) << 2) | (length - 1)
|
||||
await push_rx(ctx, hdr0)
|
||||
await push_rx(ctx, hdr1)
|
||||
result = await collect_tx(ctx, length) # READ pushes `length` bytes
|
||||
for _ in range(length): # READ_DRAIN dummies
|
||||
await push_rx(ctx, 0x00)
|
||||
return result
|
||||
|
||||
async def exi_write(ctx, addr, data):
|
||||
hdr0 = 0x80 | ((addr >> 6) & 0x7F)
|
||||
hdr1 = ((addr & 0x3F) << 2) | (len(data) - 1)
|
||||
await push_rx(ctx, hdr0)
|
||||
await push_rx(ctx, hdr1)
|
||||
for b in data:
|
||||
await push_rx(ctx, b)
|
||||
|
||||
# SPRAM contents the streaming-read test reads back (byte i = 0xA0+i).
|
||||
spram_mem = {0x100 + i: (0xA0 + i) & 0xFF for i in range(64)}
|
||||
|
||||
async def spram_model(ctx):
|
||||
"""Model the SPRAM (sync side): answer spram_req with mem[addr].
|
||||
|
||||
One request at a time, with cleanly-pulsed r_en/w_en so the FIFO pop
|
||||
and the response push stay in lock-step (no double-response races).
|
||||
"""
|
||||
state = "POP"
|
||||
held = 0
|
||||
async for vals in ctx.tick("sync").sample(
|
||||
reg.spram_req_r_rdy, reg.spram_req_r_data, reg.spram_rsp_w_rdy):
|
||||
rdy, addr, rsp_rdy = vals[-3:]
|
||||
ctx.set(reg.spram_req_r_en, 0)
|
||||
ctx.set(reg.spram_rsp_w_en, 0)
|
||||
if state == "POP":
|
||||
if rdy:
|
||||
held = spram_mem.get(addr, 0)
|
||||
ctx.set(reg.spram_req_r_en, 1) # consume the request
|
||||
state = "RESP"
|
||||
else: # RESP
|
||||
if rsp_rdy:
|
||||
ctx.set(reg.spram_rsp_w_data, held)
|
||||
ctx.set(reg.spram_rsp_w_en, 1) # deliver the response
|
||||
state = "POP"
|
||||
|
||||
errors = []
|
||||
|
||||
async def testbench(ctx):
|
||||
ctx.set(reg.tx_rdy, 1) # tx_fifo always has room in this model
|
||||
await ctx.tick("exi").repeat(8)
|
||||
|
||||
# T1: Device ID (addr=0, 4-byte read)
|
||||
result = await exi_read(ctx, 0x0000, length=4)
|
||||
if result != _DEVICE_ID:
|
||||
errors.append(f"T1 device ID: expected {_DEVICE_ID}, got {result}")
|
||||
print(f"T1 device ID: {[f'0x{b:02X}' for b in result]}")
|
||||
await ctx.tick("exi").repeat(4)
|
||||
|
||||
# T2: Write and read back PAR0-PAR3
|
||||
await exi_write(ctx, _PAR0, [0xDE, 0xAD, 0xBE, 0xEF])
|
||||
await ctx.tick("exi").repeat(4)
|
||||
result = await exi_read(ctx, _PAR0, length=4)
|
||||
if result != [0xDE, 0xAD, 0xBE, 0xEF]:
|
||||
errors.append(f"T2 PAR readback: {result}")
|
||||
print(f"T2 PAR0-3: {[f'0x{b:02X}' for b in result]}")
|
||||
await ctx.tick("exi").repeat(4)
|
||||
|
||||
# T3: NWAYS hardcoded 0x17
|
||||
result = await exi_read(ctx, _NWAYS, length=1)
|
||||
if result != [0x17]:
|
||||
errors.append(f"T3 NWAYS: expected 0x17, got {result}")
|
||||
print(f"T3 NWAYS: 0x{result[0]:02X}")
|
||||
await ctx.tick("exi").repeat(4)
|
||||
|
||||
# T4: HIPR hardcoded 0x01
|
||||
result = await exi_read(ctx, _HIPR, length=1)
|
||||
if result != [0x01]:
|
||||
errors.append(f"T4 HIPR: expected 0x01, got {result}")
|
||||
print(f"T4 HIPR: 0x{result[0]:02X}")
|
||||
await ctx.tick("exi").repeat(4)
|
||||
|
||||
# T5: IMR write, rx_irq pulse, INT_N asserts, then IR clear
|
||||
await exi_write(ctx, _IMR, [0x02]) # enable RI (bit 1)
|
||||
await ctx.tick("exi").repeat(4)
|
||||
ctx.set(reg.rx_irq_i, 1)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
ctx.set(reg.rx_irq_i, 0)
|
||||
await ctx.tick("exi").repeat(12) # wait for PS propagation
|
||||
int_n = ctx.get(reg.exi_int_n)
|
||||
if int_n != 0:
|
||||
errors.append(f"T5 INT_N after RI: expected 0, got {int_n}")
|
||||
print(f"T5 INT_N after RI pulse: {int_n} (want 0)")
|
||||
await exi_write(ctx, _IR, [0x02]) # write-1-to-clear RI
|
||||
await ctx.tick("exi").repeat(4)
|
||||
int_n = ctx.get(reg.exi_int_n)
|
||||
if int_n != 1:
|
||||
errors.append(f"T5 INT_N after clear: expected 1, got {int_n}")
|
||||
print(f"T5 INT_N after IR clear: {int_n} (want 1)")
|
||||
|
||||
# T6: streaming SPRAM read (DMA) — read N>4 bytes from 0x100 by holding
|
||||
# cs_active and clocking past the header's 4-byte length field.
|
||||
N = 12
|
||||
ctx.set(reg.cs_active, 1)
|
||||
await push_rx(ctx, 0x04) # hdr0 → addr[12:6]; addr 0x100, read
|
||||
await push_rx(ctx, 0x00) # hdr1 → addr[5:0]=0, len field ignored
|
||||
got = []
|
||||
for _ in range(5000):
|
||||
if ctx.get(reg.tx_en):
|
||||
got.append(ctx.get(reg.tx_data))
|
||||
if len(got) >= N:
|
||||
break
|
||||
await ctx.tick("exi")
|
||||
ctx.set(reg.cs_active, 0) # end the transaction
|
||||
await ctx.tick("exi").repeat(40) # let SPRAM_END drain/clean up
|
||||
want = [spram_mem[0x100 + i] for i in range(N)]
|
||||
print(f"T6 DMA read {N}B: {[f'0x{b:02X}' for b in got]}")
|
||||
if got != want:
|
||||
errors.append(f"T6 streaming SPRAM read: got {got}, want {want}")
|
||||
|
||||
# T7: a normal register read still works after the streaming transaction
|
||||
# (FSM cleaned up and returned to HEADER0)
|
||||
result = await exi_read(ctx, _NWAYS, length=1)
|
||||
if result != [0x17]:
|
||||
errors.append(f"T7 NWAYS after DMA: got {result}")
|
||||
print(f"T7 NWAYS after DMA read: 0x{result[0]:02X}")
|
||||
|
||||
sim = Simulator(reg)
|
||||
sim.add_clock(Period(MHz=24), domain="exi")
|
||||
sim.add_clock(Period(MHz=24), domain="sync")
|
||||
sim.add_testbench(testbench)
|
||||
sim.add_process(spram_model)
|
||||
|
||||
sim.run()
|
||||
|
||||
if errors:
|
||||
print("\nFAILURES:")
|
||||
for e in errors:
|
||||
print(" ", e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nAll tests passed.")
|
||||
@@ -0,0 +1,533 @@
|
||||
"""BBATop — top-level elaboratable for the GC BBA FPGA replacement.
|
||||
|
||||
Clock domains
|
||||
-------------
|
||||
capture : 54 MHz, from 12 MHz crystal via SB_PLL40_PAD (DIVR=0 DIVF=71 DIVQ=4)
|
||||
exi/sync : 24 MHz, from the iCE40UP5K internal SB_HFOSC (÷2, CLKHF_DIV=0b01)
|
||||
|
||||
Submodule instantiation and signal wiring
|
||||
-----------------------------------------
|
||||
See CLAUDE.md "Module Breakdown" and "CDC Signal Inventory" for the full list.
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
|
||||
from exi_bba.exi_capture import ExiCapture
|
||||
from exi_bba.bba_register_file import BBARegisterFile
|
||||
from exi_bba.spram_arbiter import SPRAMArbiter
|
||||
from exi_bba.rx_frame_assembler import RXFrameAssembler
|
||||
from exi_bba.tx_frame_drain import TXFrameDrain
|
||||
from exi_bba.w5500_spi_master import W5500SPIMaster
|
||||
from exi_bba.w5100_parallel_master import W5100ParallelMaster
|
||||
from exi_bba.status_panel import StatusPanel
|
||||
|
||||
from amaranth.lib.cdc import FFSynchronizer
|
||||
|
||||
__all__ = ["BBATop"]
|
||||
|
||||
|
||||
class BBATop(Elaboratable):
|
||||
"""Top-level module. Wires all submodules and defines clock domains.
|
||||
|
||||
External ports (exposed for platform or testbench connection)
|
||||
-------------------------------------------------------------
|
||||
EXI / GC interface (SPI Mode 3)
|
||||
exi_clk / exi_mosi / exi_cs_n : inputs from GC
|
||||
exi_miso : output to GC
|
||||
int_n : interrupt output (active low)
|
||||
|
||||
W5500 SPI interface (SPI Mode 0)
|
||||
w5500_clk / w5500_mosi / w5500_cs_n : outputs to W5500
|
||||
w5500_miso : input from W5500
|
||||
w5500_int_n : W5500 interrupt (input, active low)
|
||||
w5500_rst_n : W5500 hardware reset (output, active low)
|
||||
"""
|
||||
|
||||
def __init__(self, eth="w5100", reset_cycles=24000, status_panel=False):
|
||||
# Ethernet back-end: "w5100" (indirect parallel bus, reaches the EXI
|
||||
# ceiling) or "w5500" (SPI, ~12 Mbit/s). Both expose the identical
|
||||
# tx/rx/init/par interface, so only the physical pins differ.
|
||||
self._eth = eth
|
||||
# MR-reset settle wait passed to the ethernet master (~1 ms on hardware;
|
||||
# the testbench overrides with a small value for fast simulation).
|
||||
self._reset_cycles = reset_cycles
|
||||
# Optional bring-up status panel (drives onboard LEDs/button on the
|
||||
# iCEbreaker — see synth.py). panel_led bit order matches StatusPanel.
|
||||
self._status_panel = status_panel
|
||||
|
||||
# EXI (GC side)
|
||||
self.exi_clk = Signal(init=1)
|
||||
self.exi_mosi = Signal()
|
||||
self.exi_cs_n = Signal(init=1)
|
||||
self.exi_miso = Signal()
|
||||
self.int_n = Signal(init=1)
|
||||
|
||||
if eth == "w5500":
|
||||
# W5500 SPI
|
||||
self.w5500_clk = Signal()
|
||||
self.w5500_mosi = Signal()
|
||||
self.w5500_miso = Signal()
|
||||
self.w5500_cs_n = Signal(init=1)
|
||||
self.w5500_int_n = Signal(init=1)
|
||||
self.w5500_rst_n = Signal(init=1)
|
||||
else:
|
||||
# W5100 indirect parallel bus. data_o/data_oe/data_i are the FPGA
|
||||
# side of a bidirectional D[7:0] (wrapped in a tristate SB_IO at the
|
||||
# platform level); a board ties the upper address lines to 0 so only
|
||||
# A[1:0] are wired.
|
||||
self.w5100_addr = Signal(2)
|
||||
self.w5100_data_o = Signal(8)
|
||||
self.w5100_data_oe = Signal()
|
||||
self.w5100_data_i = Signal(8)
|
||||
self.w5100_cs_n = Signal(init=1)
|
||||
self.w5100_rd_n = Signal(init=1)
|
||||
self.w5100_wr_n = Signal(init=1)
|
||||
self.w5100_int_n = Signal(init=1)
|
||||
self.w5100_rst_n = Signal(init=1)
|
||||
|
||||
if status_panel:
|
||||
self.panel_led = Signal(5) # to onboard LEDs (see StatusPanel)
|
||||
self.panel_btn = Signal(3) # from onboard button(s)
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
|
||||
# ── Clock domain generation ───────────────────────────────────────
|
||||
# Three domains, two physical sources (1 PLL + 1 internal HFOSC):
|
||||
# capture @ 54 MHz (PLL) — SPI bit engine only; oversamples the
|
||||
# 27 MHz EXI clock 2× (robust Mode-3).
|
||||
# exi @ 24 MHz (HFOSC) — register file / transaction FSM.
|
||||
# sync @ 24 MHz (HFOSC) — SPRAM, RX/TX engines, ethernet master.
|
||||
# exi and sync share the HFOSC net (frequency- and phase-matched); the
|
||||
# AsyncFIFOs between them are still valid CDC and keep the module
|
||||
# boundaries clean. Only the tiny capture front-end needs the fast
|
||||
# clock — which is why 27 MHz-EXI / OG performance is reachable on the
|
||||
# iCE40UP5K even though the register-file logic tops out ~44 MHz.
|
||||
if platform is not None:
|
||||
# capture @ 54 MHz: icepll -i 12 -o 54 → DIVR=0 DIVF=71 DIVQ=4.
|
||||
# 54 MHz = 2× the 27 MHz EXI clock — the minimum oversampling that
|
||||
# cleanly implements SPI Mode 3. The isolated SPI bit engine closes
|
||||
# ~91 MHz on this device; the byte-FIFO read path brings the
|
||||
# integrated capture domain to ~62 MHz, so 54 closes with margin.
|
||||
m.domains += ClockDomain("capture")
|
||||
platform.lookup(platform.default_clk).attrs["GLOBAL"] = False
|
||||
m.submodules.pll = Instance(
|
||||
"SB_PLL40_PAD",
|
||||
p_FEEDBACK_PATH = "SIMPLE",
|
||||
p_DIVR = 0,
|
||||
p_DIVF = 71,
|
||||
p_DIVQ = 4,
|
||||
p_FILTER_RANGE = 1,
|
||||
i_PACKAGEPIN = platform.request("clk12", dir="-").io,
|
||||
i_RESETB = Const(1, 1),
|
||||
i_BYPASS = Const(0, 1),
|
||||
o_PLLOUTGLOBAL = ClockSignal("capture"),
|
||||
)
|
||||
|
||||
# exi & sync @ 24 MHz: one SB_HFOSC (÷2) drives both slow domains.
|
||||
# The bulky register-file / SPRAM / W5500 logic is routing-bound at
|
||||
# ~33–44 MHz on the UP5K; 24 MHz closes with large margin. The byte
|
||||
# rate (27 MHz EXI ÷ 8 ≈ 3.4 MHz) leaves ~7 slow cycles per byte.
|
||||
m.domains += ClockDomain("exi")
|
||||
m.domains += ClockDomain("sync")
|
||||
m.submodules.hfosc = Instance(
|
||||
"SB_HFOSC",
|
||||
p_CLKHF_DIV = "0b01", # 48 ÷ 2 → 24 MHz
|
||||
i_CLKHFEN = Const(1, 1),
|
||||
i_CLKHFPU = Const(1, 1),
|
||||
o_CLKHF = ClockSignal("exi"),
|
||||
)
|
||||
m.d.comb += ClockSignal("sync").eq(ClockSignal("exi"))
|
||||
# (simulation: test harness provides capture/exi/sync clocks via add_clock)
|
||||
|
||||
# ── Submodules ────────────────────────────────────────────────────
|
||||
cap = ExiCapture() # SPI bit engine (capture) + byte FIFOs
|
||||
reg = BBARegisterFile()
|
||||
arb = SPRAMArbiter()
|
||||
asm = RXFrameAssembler()
|
||||
drain = TXFrameDrain()
|
||||
eth = (W5500SPIMaster(reset_cycles=self._reset_cycles)
|
||||
if self._eth == "w5500"
|
||||
else W5100ParallelMaster(reset_cycles=self._reset_cycles))
|
||||
|
||||
m.submodules.cap = cap
|
||||
m.submodules.reg = reg
|
||||
m.submodules.arb = arb
|
||||
m.submodules.asm = asm
|
||||
m.submodules.drain = drain
|
||||
m.submodules.eth = eth
|
||||
|
||||
# ── External pin connections ──────────────────────────────────────
|
||||
m.d.comb += [
|
||||
# EXI inputs (to capture-domain front-end)
|
||||
cap.spi_clk .eq(self.exi_clk),
|
||||
cap.spi_mosi.eq(self.exi_mosi),
|
||||
cap.spi_cs_n.eq(self.exi_cs_n),
|
||||
# EXI outputs
|
||||
self.exi_miso.eq(cap.spi_miso),
|
||||
self.int_n .eq(reg.exi_int_n),
|
||||
]
|
||||
|
||||
# Ethernet back-end physical pins
|
||||
if self._eth == "w5500":
|
||||
m.d.comb += [
|
||||
self.w5500_clk .eq(eth.spi_clk),
|
||||
self.w5500_mosi.eq(eth.spi_mosi),
|
||||
self.w5500_cs_n.eq(eth.spi_cs_n),
|
||||
eth.spi_miso .eq(self.w5500_miso),
|
||||
eth.w5500_int_n.eq(self.w5500_int_n),
|
||||
self.w5500_rst_n.eq(eth.w5500_rst_n),
|
||||
]
|
||||
else:
|
||||
m.d.comb += [
|
||||
self.w5100_addr .eq(eth.bus_addr),
|
||||
self.w5100_data_o .eq(eth.bus_data_o),
|
||||
self.w5100_data_oe.eq(eth.bus_data_oe),
|
||||
eth.bus_data_i .eq(self.w5100_data_i),
|
||||
self.w5100_cs_n .eq(eth.cs_n),
|
||||
self.w5100_rd_n .eq(eth.rd_n),
|
||||
self.w5100_wr_n .eq(eth.wr_n),
|
||||
eth.w5100_int_n .eq(self.w5100_int_n),
|
||||
self.w5100_rst_n .eq(eth.w5100_rst_n),
|
||||
]
|
||||
|
||||
# ── ExiCapture byte stream ↔ BBARegisterFile (exi domain) ────────
|
||||
m.d.comb += [
|
||||
reg.rx_data .eq(cap.rx_data),
|
||||
reg.rx_rdy .eq(cap.rx_rdy),
|
||||
cap.rx_en .eq(reg.rx_en),
|
||||
|
||||
cap.tx_data .eq(reg.tx_data),
|
||||
cap.tx_en .eq(reg.tx_en),
|
||||
reg.tx_rdy .eq(cap.tx_rdy),
|
||||
|
||||
reg.cs_active.eq(cap.cs_active), # transaction-active (for DMA reads)
|
||||
]
|
||||
|
||||
# ── BBARegisterFile ↔ SPRAMArbiter (sync domain FIFO sides) ──────
|
||||
# SPRAM request: reg exi→sync FIFO read side → arb
|
||||
m.d.comb += [
|
||||
arb.exi_req_addr .eq(reg.spram_req_r_data),
|
||||
arb.exi_req_valid.eq(reg.spram_req_r_rdy),
|
||||
reg.spram_req_r_en.eq(arb.exi_req_ready),
|
||||
]
|
||||
# SPRAM response: arb result → reg sync→exi FIFO write side
|
||||
m.d.comb += [
|
||||
reg.spram_rsp_w_data.eq(arb.exi_rsp_data),
|
||||
reg.spram_rsp_w_en .eq(arb.exi_rsp_valid),
|
||||
# arb does not need w_rdy feedback (spram_rsp FIFO is deeper than latency)
|
||||
]
|
||||
|
||||
# ── BBARegisterFile ↔ TXFrameDrain (sync domain FIFO sides) ──────
|
||||
m.d.comb += [
|
||||
drain.tx_bytes_r_data.eq(reg.tx_bytes_r_data),
|
||||
drain.tx_bytes_r_rdy .eq(reg.tx_bytes_r_rdy),
|
||||
reg.tx_bytes_r_en .eq(drain.tx_bytes_r_en),
|
||||
|
||||
drain.tx_ctrl_r_data.eq(reg.tx_ctrl_r_data),
|
||||
drain.tx_ctrl_r_rdy .eq(reg.tx_ctrl_r_rdy),
|
||||
reg.tx_ctrl_r_en .eq(drain.tx_ctrl_r_en),
|
||||
]
|
||||
|
||||
# ── TXFrameDrain ↔ ethernet master (sync domain) ──────────────────
|
||||
m.d.comb += [
|
||||
eth.tx_data .eq(drain.tx_data),
|
||||
eth.tx_valid.eq(drain.tx_valid),
|
||||
drain.tx_ready.eq(eth.tx_ready),
|
||||
eth.tx_sof .eq(drain.tx_sof),
|
||||
eth.tx_eof .eq(drain.tx_eof),
|
||||
]
|
||||
|
||||
# ── ethernet master → RXFrameAssembler (sync domain) ─────────────
|
||||
m.d.comb += [
|
||||
asm.rx_data .eq(eth.rx_data),
|
||||
asm.rx_valid.eq(eth.rx_valid),
|
||||
eth.rx_ready.eq(asm.rx_ready),
|
||||
asm.rx_sof .eq(eth.rx_sof),
|
||||
asm.rx_eof .eq(eth.rx_eof),
|
||||
]
|
||||
|
||||
# ── RXFrameAssembler → SPRAMArbiter (ETH write, sync domain) ─────
|
||||
m.d.comb += [
|
||||
arb.eth_wr_addr .eq(asm.eth_wr_addr),
|
||||
arb.eth_wr_data .eq(asm.eth_wr_data),
|
||||
arb.eth_wr_valid.eq(asm.eth_wr_valid),
|
||||
asm.eth_wr_ready.eq(arb.eth_wr_ready),
|
||||
]
|
||||
|
||||
# ── RXFrameAssembler → BBARegisterFile (rx_wptr FIFO write side) ─
|
||||
m.d.comb += [
|
||||
reg.rx_wptr_w_data.eq(asm.rx_wptr_w_data),
|
||||
reg.rx_wptr_w_en .eq(asm.rx_wptr_w_en),
|
||||
asm.rx_wptr_w_rdy .eq(reg.rx_wptr_w_rdy),
|
||||
]
|
||||
|
||||
# ── Pulse synchronizer connections ────────────────────────────────
|
||||
m.d.comb += [
|
||||
# RX irq: sync → exi (RXFrameAssembler → reg → PS → exi domain)
|
||||
reg.rx_irq_i.eq(asm.rx_irq),
|
||||
# TX irq: sync → exi
|
||||
reg.tx_irq_i.eq(drain.tx_irq),
|
||||
# MAC address (PAR0–5) → SHAR. exi and sync share the HFOSC net,
|
||||
# and par is quasi-static (sampled by the master at init_req).
|
||||
eth.par.eq(reg.par),
|
||||
]
|
||||
|
||||
# ── RX enabled gate (NCRA SR / start-receive bit) ─────────────────
|
||||
# The RX ring-buffer path is active only after the GC sets NCRA[3].
|
||||
m.d.comb += asm.rx_enabled.eq(reg.ncra_sr)
|
||||
|
||||
# ── Optional bring-up status panel (sync domain) ──────────────────
|
||||
# init_req = NCRA reset (exi→sync PS), OR'd with the panel's manual
|
||||
# re-init button when the panel is present.
|
||||
if self._status_panel:
|
||||
panel = StatusPanel()
|
||||
m.submodules.panel = panel
|
||||
|
||||
# cs_active lives in the exi domain; bring it to sync for the LED.
|
||||
cs_a_sync = Signal()
|
||||
m.submodules.panel_cs = FFSynchronizer(
|
||||
cap.cs_active, cs_a_sync, o_domain="sync")
|
||||
|
||||
# "ready" = ethernet init complete (latched until the next init).
|
||||
ready = Signal()
|
||||
with m.If(eth.init_done):
|
||||
m.d.sync += ready.eq(1)
|
||||
with m.Elif(reg.ncra_rst_o | panel.reinit):
|
||||
m.d.sync += ready.eq(0)
|
||||
|
||||
m.d.comb += [
|
||||
panel.cs_active.eq(cs_a_sync),
|
||||
panel.rx_pulse .eq(asm.rx_irq),
|
||||
panel.tx_pulse .eq(drain.tx_irq),
|
||||
panel.ready .eq(ready),
|
||||
panel.btn .eq(self.panel_btn),
|
||||
self.panel_led .eq(panel.led),
|
||||
eth.init_req .eq(reg.ncra_rst_o | panel.reinit),
|
||||
]
|
||||
else:
|
||||
m.d.comb += eth.init_req.eq(reg.ncra_rst_o)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Integration testbench ─────────────────────────────────────────────────
|
||||
# Drives real EXI Mode-3 transactions on the GC-facing pins and checks the
|
||||
# response — exercising the full chain ExiCapture (capture domain) ↔ byte FIFOs
|
||||
# ↔ BBARegisterFile (exi domain) ↔ sync modules, across all three clock domains.
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from amaranth.sim import Simulator, Period
|
||||
|
||||
dut = BBATop(eth="w5100", reset_cycles=20, # small reset wait for sim
|
||||
status_panel=True) # also exercise the panel wiring
|
||||
errors = []
|
||||
|
||||
HALF = 8 # capture ticks per SPI half-period (well-oversampled)
|
||||
|
||||
async def spi_byte(ctx, mosi_val):
|
||||
"""Drive one EXI Mode-3 byte; return the assembled MISO byte."""
|
||||
miso = 0
|
||||
for bit in range(7, -1, -1):
|
||||
ctx.set(dut.exi_mosi, (mosi_val >> bit) & 1)
|
||||
ctx.set(dut.exi_clk, 0) # falling: slave samples MOSI
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
miso = (miso << 1) | ctx.get(dut.exi_miso)
|
||||
ctx.set(dut.exi_clk, 1) # rising
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
return miso
|
||||
|
||||
async def exi_read(ctx, addr, length):
|
||||
"""EXI immediate read: 2-byte header, clock-idle gap, then `length` bytes."""
|
||||
hdr0 = (addr >> 6) & 0x7F
|
||||
# The header length field is only 2 bits ([1:0]); mask it so a long
|
||||
# (DMA) read doesn't overflow length-1 into the addr[5:0] bits. For
|
||||
# SPRAM reads the field is ignored anyway — the stream runs until CS.
|
||||
hdr1 = ((addr & 0x3F) << 2) | ((length - 1) & 0x3)
|
||||
ctx.set(dut.exi_cs_n, 0)
|
||||
ctx.set(dut.exi_clk, 1)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
await spi_byte(ctx, hdr0)
|
||||
await spi_byte(ctx, hdr1)
|
||||
# EXI_Imm clock-idle gap: the core decodes the header and prefetches
|
||||
# responses into the tx FIFO before the GC clocks the data phase.
|
||||
await ctx.tick("capture").repeat(HALF * 12)
|
||||
result = [await spi_byte(ctx, 0x00) for _ in range(length)]
|
||||
ctx.set(dut.exi_cs_n, 1)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
return result
|
||||
|
||||
async def exi_write(ctx, addr, data):
|
||||
"""EXI immediate write: 2-byte header then the data bytes."""
|
||||
hdr0 = 0x80 | ((addr >> 6) & 0x7F)
|
||||
hdr1 = ((addr & 0x3F) << 2) | (len(data) - 1)
|
||||
ctx.set(dut.exi_cs_n, 0)
|
||||
ctx.set(dut.exi_clk, 1)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
await spi_byte(ctx, hdr0)
|
||||
await spi_byte(ctx, hdr1)
|
||||
for b in data:
|
||||
await spi_byte(ctx, b)
|
||||
ctx.set(dut.exi_cs_n, 1)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
# ── W5100 indirect-bus slave model (drives w5100_data_i) ─────────────
|
||||
# Pre-loads a known MACRAW packet in the RX buffer so we can verify the full
|
||||
# ethernet→SPRAM→GC path. Same protocol as the W5100ParallelMaster bench.
|
||||
RX_FRAME = [0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03, 0x04]
|
||||
_W_RX_BASE = 0x6000
|
||||
_W_S0_CR = 0x0401
|
||||
_W_S0_RX_RSR = 0x0426
|
||||
_W_S0_RX_RD = 0x0428
|
||||
_W_CR_RECV = 0x40
|
||||
_A_MR, _A_AR0, _A_AR1, _A_DR = 0b00, 0b01, 0b10, 0b11
|
||||
|
||||
def w5100_preload():
|
||||
plen = len(RX_FRAME) + 2 # MACRAW length includes its header
|
||||
mem = {}
|
||||
for i, b in enumerate([(plen >> 8) & 0xFF, plen & 0xFF] + RX_FRAME):
|
||||
mem[_W_RX_BASE + i] = b
|
||||
mem[_W_S0_RX_RSR], mem[_W_S0_RX_RSR + 1] = (plen >> 8) & 0xFF, plen & 0xFF
|
||||
mem[_W_S0_RX_RD], mem[_W_S0_RX_RD + 1] = 0, 0
|
||||
return mem
|
||||
|
||||
w5100_mem = w5100_preload()
|
||||
|
||||
async def w5100_model(ctx):
|
||||
idm_ar = 0
|
||||
mr = 0
|
||||
prev_cs = prev_rd = prev_wr = 1
|
||||
async for vals in ctx.tick("sync").sample(
|
||||
dut.w5100_cs_n, dut.w5100_rd_n, dut.w5100_wr_n,
|
||||
dut.w5100_addr, dut.w5100_data_o):
|
||||
cs, rd, wr, a, do = vals[-5:]
|
||||
ai = (mr >> 1) & 1
|
||||
if cs == 0 and rd == 0: # drive read data
|
||||
if a == _A_MR:
|
||||
val = mr
|
||||
elif a == _A_AR0:
|
||||
val = (idm_ar >> 8) & 0xFF
|
||||
elif a == _A_AR1:
|
||||
val = idm_ar & 0xFF
|
||||
else:
|
||||
val = w5100_mem.get(idm_ar, 0)
|
||||
ctx.set(dut.w5100_data_i, val)
|
||||
if cs == 0 and prev_wr == 0 and wr == 1: # latch write on /WR rising
|
||||
if a == _A_MR:
|
||||
mr = do
|
||||
elif a == _A_AR0:
|
||||
idm_ar = (idm_ar & 0x00FF) | (do << 8)
|
||||
elif a == _A_AR1:
|
||||
idm_ar = (idm_ar & 0xFF00) | do
|
||||
else:
|
||||
w5100_mem[idm_ar] = do
|
||||
if idm_ar == _W_S0_CR and do == _W_CR_RECV:
|
||||
w5100_mem[_W_S0_RX_RSR] = 0
|
||||
w5100_mem[_W_S0_RX_RSR + 1] = 0
|
||||
if ai:
|
||||
idm_ar = (idm_ar + 1) & 0xFFFF
|
||||
if cs == 0 and prev_rd == 0 and rd == 1 and a == _A_DR and ai:
|
||||
idm_ar = (idm_ar + 1) & 0xFFFF
|
||||
prev_cs, prev_rd, prev_wr = cs, rd, wr
|
||||
|
||||
async def testbench(ctx):
|
||||
ctx.set(dut.exi_clk, 1)
|
||||
ctx.set(dut.exi_cs_n, 1)
|
||||
ctx.set(dut.panel_btn, 0b111) # all buttons released (active-low idle)
|
||||
await ctx.tick("capture").repeat(20)
|
||||
|
||||
# T1: device ID — read 4 bytes from addr 0 → 0x04 0x02 0x02 0x00
|
||||
dev = await exi_read(ctx, 0x0000, 4)
|
||||
print(f"T1 device ID: {[f'0x{b:02X}' for b in dev]}")
|
||||
if dev != [0x04, 0x02, 0x02, 0x00]:
|
||||
errors.append(f"T1 device ID: got {dev}")
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
# T2: write PAR0–3, read them back through the full chain
|
||||
await exi_write(ctx, 0x20, [0xDE, 0xAD, 0xBE, 0xEF])
|
||||
await ctx.tick("capture").repeat(HALF * 4)
|
||||
par = await exi_read(ctx, 0x20, 4)
|
||||
print(f"T2 PAR0-3 readback: {[f'0x{b:02X}' for b in par]}")
|
||||
if par != [0xDE, 0xAD, 0xBE, 0xEF]:
|
||||
errors.append(f"T2 PAR readback: got {par}")
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
# T3: NWAYS must read back the hardcoded 0x17 (link-up sentinel)
|
||||
nways = await exi_read(ctx, 0x31, 1)
|
||||
print(f"T3 NWAYS: 0x{nways[0]:02X} (want 0x17)")
|
||||
if nways != [0x17]:
|
||||
errors.append(f"T3 NWAYS: got {nways}")
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
# T4: DMA-style SPRAM read — clock 8 data bytes (past the 4-byte header
|
||||
# limit) within one CS. Exercises the integrated streaming path:
|
||||
# ExiCapture(cs_active) → register file SPRAM_STREAM → SPRAMArbiter →
|
||||
# real SPRAM → MISO, plus the SPRAM_END cleanup. SPRAM is uninitialised
|
||||
# here, so we check the stream completes (8 bytes, no underrun/hang)
|
||||
# rather than specific data.
|
||||
dma = await exi_read(ctx, 0x0100, 8)
|
||||
print(f"T4 DMA read (8B from 0x100): {[f'0x{b:02X}' for b in dma]}")
|
||||
if len(dma) != 8:
|
||||
errors.append(f"T4 DMA read length: got {len(dma)}")
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
# T5: a register read after the streaming read confirms the FSM cleaned
|
||||
# up (SPRAM_END → HEADER0) and the device is responsive again.
|
||||
nways2 = await exi_read(ctx, 0x31, 1)
|
||||
print(f"T5 NWAYS after DMA: 0x{nways2[0]:02X} (want 0x17)")
|
||||
if nways2 != [0x17]:
|
||||
errors.append(f"T5 NWAYS after DMA read: got {nways2}")
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
# ── T6: FULL ETHERNET→SPRAM→GC LOOP ──────────────────────────────
|
||||
# A frame arrives from the network (W5500 model) → W5500 master reads it
|
||||
# → RXFrameAssembler writes it to the SPRAM ring → GC reads RWP then
|
||||
# DMA-reads the descriptor+frame back. Exercises the entire RX path.
|
||||
# The W5100 needs its init sequence (which sets MR.AI / opens socket 0)
|
||||
# before multi-byte bus accesses work — trigger it via NCRA reset, as
|
||||
# the real GC driver does, and let it run before enabling RX.
|
||||
await exi_write(ctx, 0x00, [0x01]) # NCRA reset → init_req pulse
|
||||
await ctx.tick("capture").repeat(2000) # let W5100 init run
|
||||
await exi_write(ctx, 0x00, [0x08]) # NCRA SR bit → enable RX
|
||||
await ctx.tick("capture").repeat(HALF * 2)
|
||||
ctx.set(dut.w5100_int_n, 0) # W5100: a packet was received
|
||||
await ctx.tick("capture").repeat(4000) # let the W5100 RX + SPRAM write run
|
||||
ctx.set(dut.w5100_int_n, 1)
|
||||
await ctx.tick("capture").repeat(HALF * 2)
|
||||
|
||||
rwp = await exi_read(ctx, 0x16, 1) # RX write pointer (page)
|
||||
total_len = len(RX_FRAME) + 4
|
||||
got = await exi_read(ctx, 0x0100, total_len) # descriptor + frame
|
||||
want = [0x00, 0x00, (total_len >> 8) & 0xFF, total_len & 0xFF] + RX_FRAME
|
||||
print(f"T6 RWP=0x{rwp[0]:02X} (want 0x02)")
|
||||
print(f"T6 SPRAM[0x100]: {[f'0x{b:02X}' for b in got]}")
|
||||
print(f"T6 expected : {[f'0x{b:02X}' for b in want]}")
|
||||
if rwp != [0x02]:
|
||||
errors.append(f"T6 RWP: got {rwp}, want [0x02]")
|
||||
if got != want:
|
||||
errors.append(f"T6 RX frame mismatch:\n got {got}\n want {want}")
|
||||
|
||||
# T7: status-panel integration — after all the EXI traffic above, the
|
||||
# EXI-activity LED (panel led[1] = stretched cs_active) must be lit,
|
||||
# proving cap.cs_active → FFSync → StatusPanel → LED is wired end-to-end.
|
||||
leds = ctx.get(dut.panel_led)
|
||||
if not (leds >> 1) & 1:
|
||||
errors.append(f"T7 panel: EXI-activity LED not lit (led=0b{leds:05b})")
|
||||
print(f"T7 panel led=0b{leds:05b} (bit1=EXI activity, expect 1)")
|
||||
|
||||
sim = Simulator(dut)
|
||||
sim.add_clock(Period(MHz=54), domain="capture")
|
||||
sim.add_clock(Period(MHz=24), domain="exi")
|
||||
sim.add_clock(Period(MHz=24), domain="sync")
|
||||
sim.add_testbench(testbench)
|
||||
sim.add_process(w5100_model)
|
||||
sim.run()
|
||||
|
||||
if errors:
|
||||
print("\nFAILURES:")
|
||||
for e in errors:
|
||||
print(" ", e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nAll BBATop integration tests passed.")
|
||||
@@ -0,0 +1,222 @@
|
||||
"""EEPROM model — exi domain.
|
||||
|
||||
Emulates the MX98730EC's 93C46 serial EEPROM.
|
||||
|
||||
93C46 protocol (Microwire, bit-bang)
|
||||
-------------------------------------
|
||||
CS=1 activates the device.
|
||||
Data clocked on rising SK edge, 9-bit header then data:
|
||||
Bit 0: start (always 1)
|
||||
Bit 1: opcode MSB } READ = 10
|
||||
Bit 2: opcode LSB }
|
||||
Bits 3–8: 6-bit address (MSB first)
|
||||
|
||||
After the 9th rising SK the DO line presents the MSB of the 16-bit word.
|
||||
Each subsequent rising SK advances one bit (MSB→LSB).
|
||||
|
||||
Shift register `shift_in` convention
|
||||
--------------------------------------
|
||||
`Cat(di_s, shift_in[:-1])` places di_s at bit 0 and shifts existing bits up.
|
||||
After N edges:
|
||||
shift_in[N-1] = first bit received (start)
|
||||
shift_in[0] = last bit received so far
|
||||
|
||||
At bit_ctr==8 (after 8 edges, receiving 9th on di_s):
|
||||
shift_in[7] = start (bit 0)
|
||||
shift_in[6] = opcode MSB (bit 1)
|
||||
shift_in[5] = opcode LSB (bit 2)
|
||||
shift_in[4:0] = addr[5:1] (bits 3–7, MSB first→LSB first in register)
|
||||
di_s = addr[0] (bit 8)
|
||||
|
||||
opcode = Cat(shift_in[5], shift_in[6]) → 0b10 = READ
|
||||
address = Cat(di_s, shift_in[0:5]) → addr[0..5]
|
||||
|
||||
EEPROM content (64 × 16-bit words)
|
||||
-------------------------------------
|
||||
Words 0–2 hold the source MAC address (Nintendo OUI 00:09:BF:AA:BB:CC).
|
||||
The GC BBA driver reads words 0–3 then copies to PAR0–5.
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
from amaranth.lib.cdc import FFSynchronizer
|
||||
|
||||
__all__ = ["EEPROMModel"]
|
||||
|
||||
_EEPROM_WORDS = [
|
||||
0x0009, # word 0: PAR0=0x00, PAR1=0x09
|
||||
0xBFAA, # word 1: PAR2=0xBF, PAR3=0xAA
|
||||
0xBBCC, # word 2: PAR4=0xBB, PAR5=0xCC
|
||||
0x0000, # word 3: checksum placeholder
|
||||
]
|
||||
_EEPROM_WORDS += [0x0000] * (64 - len(_EEPROM_WORDS))
|
||||
|
||||
_OP_READ = 0b10 # opcode for READ
|
||||
|
||||
|
||||
class EEPROMModel(Elaboratable):
|
||||
"""93C46 serial EEPROM model in the exi domain (read-only).
|
||||
|
||||
Ports
|
||||
-----
|
||||
sk / cs / di : bit-bang inputs (raw async; synchronized internally)
|
||||
do : serial data output
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.sk = Signal()
|
||||
self.cs = Signal()
|
||||
self.di = Signal()
|
||||
self.do = Signal()
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
|
||||
words = Array([Signal(16, init=v, name=f"e{i}") for i, v in enumerate(_EEPROM_WORDS)])
|
||||
|
||||
# ── Input synchronization (async → exi, 2 stages) ────────────────
|
||||
sk_s = Signal()
|
||||
cs_s = Signal()
|
||||
di_s = Signal()
|
||||
m.submodules.sync_sk = FFSynchronizer(self.sk, sk_s, o_domain="exi")
|
||||
m.submodules.sync_cs = FFSynchronizer(self.cs, cs_s, o_domain="exi")
|
||||
m.submodules.sync_di = FFSynchronizer(self.di, di_s, o_domain="exi")
|
||||
|
||||
sk_prev = Signal()
|
||||
m.d.exi += sk_prev.eq(sk_s)
|
||||
rising_sk = Signal()
|
||||
m.d.comb += rising_sk.eq(sk_s & ~sk_prev)
|
||||
|
||||
# ── State ─────────────────────────────────────────────────────────
|
||||
shift_in = Signal(9)
|
||||
bit_ctr = Signal(4) # 0..8 during header receive
|
||||
|
||||
shift_out = Signal(16) # data word being shifted out MSB-first
|
||||
out_ctr = Signal(4) # 0..15, counts bits shifted out
|
||||
in_read = Signal() # 1 while outputting a word
|
||||
|
||||
# DO is combinatorial: MSB of shift_out while in read-out phase
|
||||
m.d.comb += self.do.eq(Mux(in_read, shift_out[15], 0))
|
||||
|
||||
with m.If(~cs_s):
|
||||
m.d.exi += bit_ctr.eq(0)
|
||||
m.d.exi += in_read.eq(0)
|
||||
m.d.exi += out_ctr.eq(0)
|
||||
|
||||
with m.Elif(rising_sk):
|
||||
with m.If(in_read):
|
||||
# Shift out next bit (MSB first: left shift, zero into LSB)
|
||||
m.d.exi += shift_out.eq(Cat(0, shift_out[:-1]))
|
||||
with m.If(out_ctr == 15):
|
||||
m.d.exi += in_read.eq(0)
|
||||
m.d.exi += out_ctr.eq(0)
|
||||
with m.Else():
|
||||
m.d.exi += out_ctr.eq(out_ctr + 1)
|
||||
|
||||
with m.Else():
|
||||
# Shift di_s in at bit 0 (existing bits move up)
|
||||
m.d.exi += shift_in.eq(Cat(di_s, shift_in[:-1]))
|
||||
m.d.exi += bit_ctr.eq(bit_ctr + 1)
|
||||
|
||||
with m.If(bit_ctr == 8):
|
||||
# 9th bit (di_s = addr[0]) arrives.
|
||||
# shift_in[7] = start, [6]=op_MSB, [5]=op_LSB, [4:0]=addr[5:1]
|
||||
op = Cat(shift_in[5], shift_in[6]) # 0b10 for READ
|
||||
adr = Cat(di_s, shift_in[0:5]) # addr[0..5]
|
||||
with m.If(op == _OP_READ):
|
||||
m.d.exi += shift_out.eq(words[adr])
|
||||
m.d.exi += in_read.eq(1)
|
||||
m.d.exi += out_ctr.eq(0)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Testbench ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from amaranth.sim import Simulator, Period
|
||||
|
||||
dut = EEPROMModel()
|
||||
errors = []
|
||||
|
||||
HALF = 6 # exi-domain ticks per SK half-period (much longer than sync latency)
|
||||
|
||||
async def eeprom_read(ctx, addr):
|
||||
"""93C46 READ at 6-bit address; returns 16-bit word.
|
||||
|
||||
DO is read BEFORE each rising SK edge, since in_read=1 causes
|
||||
shift_out[15] to be valid between edges. After 16 reads the full
|
||||
16-bit word is assembled MSB-first.
|
||||
"""
|
||||
ctx.set(dut.cs, 1)
|
||||
ctx.set(dut.sk, 0)
|
||||
await ctx.tick("exi").repeat(HALF)
|
||||
|
||||
# Transmit 9 bits: start(1) + opcode READ(10) + addr[5:0] MSB-first
|
||||
bits = [1, 1, 0]
|
||||
for a in range(5, -1, -1):
|
||||
bits.append((addr >> a) & 1)
|
||||
|
||||
for bit in bits:
|
||||
ctx.set(dut.di, bit)
|
||||
ctx.set(dut.sk, 1) # rising edge: DUT latches bit
|
||||
await ctx.tick("exi").repeat(HALF)
|
||||
ctx.set(dut.sk, 0)
|
||||
await ctx.tick("exi").repeat(HALF)
|
||||
|
||||
# After 9th falling SK: in_read=1, shift_out=word[addr], do=MSB.
|
||||
# Read DO before each rising edge (it is valid in the LOW phase).
|
||||
result = 0
|
||||
for _ in range(16):
|
||||
result = (result << 1) | ctx.get(dut.do) # sample before rising SK
|
||||
ctx.set(dut.sk, 1)
|
||||
await ctx.tick("exi").repeat(HALF)
|
||||
ctx.set(dut.sk, 0)
|
||||
await ctx.tick("exi").repeat(HALF)
|
||||
|
||||
ctx.set(dut.cs, 0)
|
||||
await ctx.tick("exi").repeat(HALF)
|
||||
return result
|
||||
|
||||
async def testbench(ctx):
|
||||
await ctx.tick("exi").repeat(4)
|
||||
ctx.set(dut.cs, 0)
|
||||
ctx.set(dut.sk, 0)
|
||||
ctx.set(dut.di, 0)
|
||||
await ctx.tick("exi").repeat(4)
|
||||
|
||||
w0 = await eeprom_read(ctx, 0)
|
||||
print(f"T1 word 0 = 0x{w0:04X} (expected 0x0009)")
|
||||
if w0 != 0x0009:
|
||||
errors.append(f"T1: word 0 = 0x{w0:04X}, expected 0x0009")
|
||||
|
||||
w1 = await eeprom_read(ctx, 1)
|
||||
print(f"T2 word 1 = 0x{w1:04X} (expected 0xBFAA)")
|
||||
if w1 != 0xBFAA:
|
||||
errors.append(f"T2: word 1 = 0x{w1:04X}, expected 0xBFAA")
|
||||
|
||||
w2 = await eeprom_read(ctx, 2)
|
||||
print(f"T3 word 2 = 0x{w2:04X} (expected 0xBBCC)")
|
||||
if w2 != 0xBBCC:
|
||||
errors.append(f"T3: word 2 = 0x{w2:04X}, expected 0xBBCC")
|
||||
|
||||
# T4: word 3 → 0x0000
|
||||
w3 = await eeprom_read(ctx, 3)
|
||||
print(f"T4 word 3 = 0x{w3:04X} (expected 0x0000)")
|
||||
if w3 != 0x0000:
|
||||
errors.append(f"T4: word 3 = 0x{w3:04X}, expected 0x0000")
|
||||
|
||||
sim = Simulator(dut)
|
||||
sim.add_clock(Period(MHz=24), domain="exi")
|
||||
sim.add_testbench(testbench)
|
||||
|
||||
with sim.write_vcd("EEPROMModel.vcd"):
|
||||
sim.run()
|
||||
|
||||
if errors:
|
||||
print("\nFAILURES:")
|
||||
for e in errors:
|
||||
print(" ", e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nAll tests passed.")
|
||||
@@ -0,0 +1,269 @@
|
||||
"""ExiCapture — fast EXI byte-capture front-end (capture domain, 54 MHz).
|
||||
|
||||
Wraps the SPIMode3Slave bit engine and bridges it to the slower `exi` domain
|
||||
(24 MHz) through two AsyncFIFOs:
|
||||
|
||||
capture (54 MHz) exi (24 MHz)
|
||||
┌────────────────────┐ rx_fifo ───► received bytes (header + data)
|
||||
│ SPIMode3Slave │ (8-bit, capture→exi)
|
||||
│ (bit engine) │ tx_fifo ◄─── response bytes to drive on MISO
|
||||
└────────────────────┘ (8-bit, exi→capture)
|
||||
|
||||
Why split: the bit engine must oversample a 27 MHz EXI clock 2×, which needs a
|
||||
54 MHz clock — far faster than the register-file logic can close (~44 MHz).
|
||||
Only this small, shallow front-end runs fast; everything else stays at 24 MHz.
|
||||
|
||||
TX response gating
|
||||
------------------
|
||||
Every EXI transaction begins with 2 header bytes (write_flag/addr/len) during
|
||||
which the GC ignores MISO. The core cannot have produced a response yet (it
|
||||
hasn't even decoded the header), so the wrapper must NOT pop tx_fifo for those
|
||||
2 bytes. A per-transaction counter (`txld_cnt`, reset by frame_start) gates the
|
||||
pop: header bytes drive a don't-care 0xFF; from the first data byte onward the
|
||||
wrapper pops tx_fifo (one byte per tx_load). `tx_hold` is registered at tx_load
|
||||
time — before the FIFO advances — so the bit engine latches the correct byte on
|
||||
the following SPI rising edge (the classic FWFT-advance off-by-one is avoided).
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
from amaranth.lib.cdc import FFSynchronizer
|
||||
from amaranth.lib.fifo import AsyncFIFO
|
||||
|
||||
from exi_bba.spi_mode3_slave import SPIMode3Slave
|
||||
|
||||
__all__ = ["ExiCapture"]
|
||||
|
||||
|
||||
class ExiCapture(Elaboratable):
|
||||
"""EXI front-end: SPI bit engine (capture domain) + byte FIFOs to core.
|
||||
|
||||
Physical SPI pins (capture domain)
|
||||
----------------------------------
|
||||
spi_clk / spi_mosi / spi_cs_n : raw async inputs from the GC
|
||||
spi_miso : output to the GC
|
||||
|
||||
Core-facing RX byte stream (core domain, FWFT read side of rx_fifo)
|
||||
------------------------------------------------------------------
|
||||
rx_data : current received byte
|
||||
rx_rdy : a received byte is available
|
||||
rx_en : pop (assert for one core cycle to consume rx_data)
|
||||
|
||||
Core-facing TX byte stream (core domain, write side of tx_fifo)
|
||||
--------------------------------------------------------------
|
||||
tx_data : response byte to enqueue
|
||||
tx_en : write strobe
|
||||
tx_rdy : tx_fifo has room
|
||||
"""
|
||||
|
||||
def __init__(self, rx_depth=4, tx_depth=2):
|
||||
self._rx_depth = rx_depth
|
||||
self._tx_depth = tx_depth
|
||||
|
||||
# Physical SPI (capture domain, wired to pins by BBATop)
|
||||
self.spi_clk = Signal(init=1)
|
||||
self.spi_mosi = Signal()
|
||||
self.spi_cs_n = Signal(init=1)
|
||||
self.spi_miso = Signal()
|
||||
|
||||
# Core-facing RX read side
|
||||
self.rx_data = Signal(8)
|
||||
self.rx_rdy = Signal()
|
||||
self.rx_en = Signal()
|
||||
|
||||
# Core-facing TX write side
|
||||
self.tx_data = Signal(8)
|
||||
self.tx_en = Signal()
|
||||
self.tx_rdy = Signal()
|
||||
|
||||
# Core-facing: high (exi domain) while a transaction is in progress.
|
||||
# The register file uses it to stream variable-length (DMA) reads until
|
||||
# CS deasserts.
|
||||
self.cs_active = Signal()
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
|
||||
spi = SPIMode3Slave(domain="capture")
|
||||
m.submodules.spi = spi
|
||||
|
||||
rx_fifo = AsyncFIFO(width=8, depth=self._rx_depth,
|
||||
w_domain="capture", r_domain="exi")
|
||||
tx_fifo = AsyncFIFO(width=8, depth=self._tx_depth,
|
||||
w_domain="exi", r_domain="capture")
|
||||
m.submodules.rx_fifo = rx_fifo
|
||||
m.submodules.tx_fifo = tx_fifo
|
||||
|
||||
# cs_active (capture) → exi domain for the register file
|
||||
m.submodules.cs_sync = FFSynchronizer(spi.cs_active, self.cs_active,
|
||||
o_domain="exi")
|
||||
|
||||
# ── Physical pins ↔ bit engine ───────────────────────────────────
|
||||
m.d.comb += [
|
||||
spi.spi_clk .eq(self.spi_clk),
|
||||
spi.spi_mosi.eq(self.spi_mosi),
|
||||
spi.spi_cs_n.eq(self.spi_cs_n),
|
||||
self.spi_miso.eq(spi.spi_miso),
|
||||
]
|
||||
|
||||
# ── RX: every received byte → rx_fifo (capture write side) ───────
|
||||
m.d.comb += [
|
||||
rx_fifo.w_data.eq(spi.rx_byte),
|
||||
rx_fifo.w_en .eq(spi.rx_valid),
|
||||
]
|
||||
# Core read side
|
||||
m.d.comb += [
|
||||
self.rx_data .eq(rx_fifo.r_data),
|
||||
self.rx_rdy .eq(rx_fifo.r_rdy),
|
||||
rx_fifo.r_en .eq(self.rx_en),
|
||||
]
|
||||
|
||||
# ── TX: core write side ──────────────────────────────────────────
|
||||
m.d.comb += [
|
||||
tx_fifo.w_data.eq(self.tx_data),
|
||||
tx_fifo.w_en .eq(self.tx_en),
|
||||
self.tx_rdy .eq(tx_fifo.w_rdy),
|
||||
]
|
||||
|
||||
# ── TX response gating (capture domain) ──────────────────────────
|
||||
# The bit engine drives MISO LIVE from tx_byte = tx_fifo head, so the
|
||||
# response byte at the head is what gets sent for the current data byte.
|
||||
# `txld_cnt` counts completed bytes within the transaction (tx_load
|
||||
# pulses at each byte completion):
|
||||
# completion 0,1 → header bytes (no pop)
|
||||
# completion ≥2 → a data byte finished → pop to advance the head
|
||||
# The first data byte (data0) is served live from the head without a
|
||||
# pop; the pop after it advances the head to data1's response, etc.
|
||||
txld_cnt = Signal(2)
|
||||
|
||||
m.d.comb += spi.tx_byte.eq(tx_fifo.r_data)
|
||||
|
||||
# Pop depends ONLY on the registered tx_load and txld_cnt — NOT on
|
||||
# frame_start. (frame_start precedes byte-0's tx_load by a cycle and
|
||||
# has already reset txld_cnt to 0, so byte 0 is never a data byte.)
|
||||
# Keeping cs_fall/frame_start off the pop path shortens the capture-
|
||||
# domain critical path through the FIFO consume pointer.
|
||||
#
|
||||
# `flushing` clears prefetch over-push left in tx_fifo by the previous
|
||||
# transaction: the register file streams response bytes ahead of the GC
|
||||
# clock for DMA reads, so when CS deasserts mid-stream a few unsent
|
||||
# bytes remain. On CS-fall (frame_start) drain tx_fifo to empty before
|
||||
# the new transaction's data phase, so stale bytes never reach MISO.
|
||||
flushing = Signal()
|
||||
m.d.comb += tx_fifo.r_en.eq(
|
||||
(spi.tx_load & (txld_cnt >= 2)) | (flushing & tx_fifo.r_rdy)
|
||||
)
|
||||
with m.If(spi.frame_start):
|
||||
m.d.capture += flushing.eq(1)
|
||||
with m.Elif(~tx_fifo.r_rdy):
|
||||
m.d.capture += flushing.eq(0)
|
||||
|
||||
with m.If(spi.frame_start):
|
||||
m.d.capture += txld_cnt.eq(0)
|
||||
with m.Elif(spi.tx_load & (txld_cnt < 3)):
|
||||
m.d.capture += txld_cnt.eq(txld_cnt + 1)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Testbench ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from amaranth.sim import Simulator, Period
|
||||
|
||||
dut = ExiCapture()
|
||||
errors = []
|
||||
|
||||
# SPI half-period in capture ticks. At 54 MHz capture / 27 MHz EXI the real
|
||||
# ratio is ~2; use 4 here for a clean, well-oversampled functional check.
|
||||
HALF = 4
|
||||
|
||||
async def spi_byte(ctx, mosi_val):
|
||||
"""Clock one SPI Mode 3 byte; return the assembled MISO byte."""
|
||||
miso = 0
|
||||
for bit in range(7, -1, -1):
|
||||
ctx.set(dut.spi_mosi, (mosi_val >> bit) & 1)
|
||||
ctx.set(dut.spi_clk, 0)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
miso = (miso << 1) | ctx.get(dut.spi_miso)
|
||||
ctx.set(dut.spi_clk, 1)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
return miso
|
||||
|
||||
async def core_drain_rx(ctx, into):
|
||||
"""Pop one byte from the core RX side if available."""
|
||||
if ctx.get(dut.rx_rdy):
|
||||
into.append(ctx.get(dut.rx_data))
|
||||
ctx.set(dut.rx_en, 1)
|
||||
await ctx.tick("exi").repeat(1)
|
||||
ctx.set(dut.rx_en, 0)
|
||||
return True
|
||||
return False
|
||||
|
||||
async def push_tx(ctx, b):
|
||||
ctx.set(dut.tx_data, b)
|
||||
ctx.set(dut.tx_en, 1)
|
||||
await ctx.tick("exi").repeat(1)
|
||||
ctx.set(dut.tx_en, 0)
|
||||
|
||||
async def do_txn(ctx, hdr, responses, n_data, rx_seen):
|
||||
"""One EXI transaction: clock `hdr` bytes, model the clock-idle gap
|
||||
(drain rx + prefetch `responses` into tx_fifo), then clock `n_data`
|
||||
data bytes; return the MISO data bytes read."""
|
||||
ctx.set(dut.spi_cs_n, 0)
|
||||
ctx.set(dut.spi_clk, 1)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
for h in hdr:
|
||||
await spi_byte(ctx, h)
|
||||
for _ in range(20): # clock-idle gap
|
||||
await core_drain_rx(ctx, rx_seen)
|
||||
await ctx.tick("exi").repeat(1)
|
||||
for r in responses:
|
||||
await push_tx(ctx, r)
|
||||
await ctx.tick("capture").repeat(2)
|
||||
miso = [await spi_byte(ctx, 0x00) for _ in range(n_data)]
|
||||
ctx.set(dut.spi_cs_n, 1)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
for _ in range(20): # drain data-phase dummies
|
||||
await core_drain_rx(ctx, rx_seen)
|
||||
await ctx.tick("exi").repeat(1)
|
||||
return miso
|
||||
|
||||
async def testbench(ctx):
|
||||
rx_seen = []
|
||||
await ctx.tick("capture").repeat(2)
|
||||
|
||||
# ── T1: header + 2 data bytes read back ──────────────────────────
|
||||
miso = await do_txn(ctx, [0x12, 0x34], [0xA5, 0x5A], 2, rx_seen)
|
||||
print(f"T1 rx={[hex(b) for b in rx_seen[:2]]} MISO={[f'0x{b:02X}' for b in miso]}")
|
||||
if rx_seen[:2] != [0x12, 0x34]:
|
||||
errors.append(f"T1 header rx wrong: {rx_seen[:2]}")
|
||||
if miso != [0xA5, 0x5A]:
|
||||
errors.append(f"T1 MISO wrong: {[hex(b) for b in miso]}")
|
||||
|
||||
# ── T2: prefetch over-push must NOT leak into the next transaction ─
|
||||
# Txn A pushes 2 responses but the GC clocks only 1 data byte, leaving
|
||||
# one stale byte in tx_fifo. Txn B must read its OWN fresh responses,
|
||||
# proving the CS-fall flush cleared the stale prefetch.
|
||||
rx_seen.clear()
|
||||
await do_txn(ctx, [0x12, 0x34], [0xA5, 0x5A], 1, rx_seen) # leaves 0x5A
|
||||
misoB = await do_txn(ctx, [0x12, 0x34], [0x11, 0x22], 2, rx_seen)
|
||||
print(f"T2 MISO after over-push: {[f'0x{b:02X}' for b in misoB]} (want 0x11 0x22)")
|
||||
if misoB != [0x11, 0x22]:
|
||||
errors.append(f"T2 flush failed — stale byte leaked: {[hex(b) for b in misoB]}")
|
||||
|
||||
sim = Simulator(dut)
|
||||
sim.add_clock(Period(MHz=54), domain="capture")
|
||||
sim.add_clock(Period(MHz=24), domain="exi")
|
||||
sim.add_testbench(testbench)
|
||||
|
||||
with sim.write_vcd("ExiCapture.vcd"):
|
||||
sim.run()
|
||||
|
||||
if errors:
|
||||
print("\nFAILURES:")
|
||||
for e in errors:
|
||||
print(" ", e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nAll tests passed.")
|
||||
@@ -0,0 +1,312 @@
|
||||
"""RX frame assembler — sync domain (24 MHz).
|
||||
|
||||
Receives raw ethernet frames from W5500SPIMaster and writes them into the SPRAM
|
||||
ring buffer in MX98730EC format.
|
||||
|
||||
Ring buffer layout (SPRAM byte addresses)
|
||||
------------------------------------------
|
||||
0x0100–0x0FFF 15 pages × 256 bytes = 3840 bytes
|
||||
Pages 0x01–0x0F; page 0x00 is reserved.
|
||||
Page wrap: after 0x0F → 0x01 (skip 0x00).
|
||||
|
||||
Frame descriptor (4 bytes at page start)
|
||||
-----------------------------------------------
|
||||
Byte 0: LRPS (last received packet status) — 0x00
|
||||
Byte 1: 0x00
|
||||
Byte 2: total_length[15:8] (big-endian; includes 4 descriptor bytes)
|
||||
Byte 3: total_length[7:0]
|
||||
Bytes 4+: raw ethernet frame
|
||||
|
||||
Write sequence
|
||||
--------------
|
||||
1. Issue 4 SPRAM writes of 0x00 (placeholder descriptor).
|
||||
2. For each byte received from W5500, issue one SPRAM write.
|
||||
3. After EOF: rewrite descriptor bytes 2 and 3 with actual length.
|
||||
4. Advance RWP, push to rx_wptr FIFO, pulse rx_irq.
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
|
||||
__all__ = ["RXFrameAssembler"]
|
||||
|
||||
_RX_PAGE_FIRST = 0x01
|
||||
_RX_PAGE_LAST = 0x0F
|
||||
_PAGES_TOTAL = _RX_PAGE_LAST - _RX_PAGE_FIRST + 1 # 15
|
||||
|
||||
|
||||
class RXFrameAssembler(Elaboratable):
|
||||
"""Writes incoming ethernet frames into the SPRAM ring buffer.
|
||||
|
||||
W5500 streaming interface (sync domain)
|
||||
----------------------------------------
|
||||
rx_data / rx_valid / rx_ready : byte stream
|
||||
rx_sof / rx_eof : frame delimiters (same cycle as rx_valid)
|
||||
|
||||
SPRAM write interface (to SPRAMArbiter, sync domain)
|
||||
-----------------------------------------------------
|
||||
eth_wr_addr / eth_wr_data / eth_wr_valid / eth_wr_ready
|
||||
|
||||
CDC outputs (wired by BBATop)
|
||||
-----------------------------
|
||||
rx_wptr_w_data / rx_wptr_w_en / rx_wptr_w_rdy
|
||||
rx_irq : 1-cycle pulse → PulseSynchronizer input
|
||||
rx_enabled : controlled by NCRA SR bit (from BBARegisterFile)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# W5500 stream in
|
||||
self.rx_data = Signal(8)
|
||||
self.rx_valid = Signal()
|
||||
self.rx_ready = Signal()
|
||||
self.rx_sof = Signal()
|
||||
self.rx_eof = Signal()
|
||||
|
||||
# SPRAM write out
|
||||
self.eth_wr_addr = Signal(16)
|
||||
self.eth_wr_data = Signal(8)
|
||||
self.eth_wr_valid = Signal()
|
||||
self.eth_wr_ready = Signal()
|
||||
|
||||
# RWP FIFO write-side (sync→exi)
|
||||
self.rx_wptr_w_data = Signal(8)
|
||||
self.rx_wptr_w_en = Signal()
|
||||
self.rx_wptr_w_rdy = Signal()
|
||||
|
||||
# rx_irq pulse (→ PulseSynchronizer)
|
||||
self.rx_irq = Signal()
|
||||
|
||||
# RX gate from NCRA SR bit
|
||||
self.rx_enabled = Signal()
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
|
||||
# ── Ring-buffer state ─────────────────────────────────────────────
|
||||
rwp = Signal(8, init=_RX_PAGE_FIRST) # current RX write page (1–15)
|
||||
|
||||
# Write address within current frame
|
||||
wr_addr = Signal(16)
|
||||
# Number of frame data bytes received
|
||||
data_ctr = Signal(12)
|
||||
# Total length = data_ctr + 4
|
||||
total_len = Signal(12)
|
||||
|
||||
# Descriptor base (rwp*256) — saved when frame starts
|
||||
desc_base = Signal(16)
|
||||
|
||||
# Placeholder descriptor byte counter (0..3)
|
||||
desc_ctr = Signal(2)
|
||||
|
||||
# Number of pages consumed by this frame (rounded up)
|
||||
pages_used = Signal(5)
|
||||
|
||||
# Default: no pulses
|
||||
m.d.sync += self.rx_irq.eq(0)
|
||||
m.d.sync += self.rx_wptr_w_en.eq(0)
|
||||
|
||||
# Combinatorial outputs
|
||||
m.d.comb += total_len.eq(data_ctr + 4)
|
||||
|
||||
with m.FSM(domain="sync", name="rx_fsm"):
|
||||
|
||||
with m.State("IDLE"):
|
||||
m.d.comb += self.rx_ready.eq(0)
|
||||
m.d.sync += self.eth_wr_valid.eq(0)
|
||||
with m.If(self.rx_valid & self.rx_sof & self.rx_enabled):
|
||||
frame_base = Signal(16)
|
||||
m.d.comb += frame_base.eq(Cat(Const(0, 8), rwp))
|
||||
m.d.sync += desc_base.eq(frame_base)
|
||||
m.d.sync += wr_addr.eq(frame_base)
|
||||
m.d.sync += data_ctr.eq(0)
|
||||
m.d.sync += desc_ctr.eq(0)
|
||||
m.next = "WRITE_PLACEHOLDER"
|
||||
|
||||
with m.State("WRITE_PLACEHOLDER"):
|
||||
# Write 4 bytes of 0x00 as placeholder descriptor
|
||||
m.d.sync += self.eth_wr_addr.eq(wr_addr)
|
||||
m.d.sync += self.eth_wr_data.eq(0x00)
|
||||
m.d.sync += self.eth_wr_valid.eq(1)
|
||||
with m.If(self.eth_wr_ready):
|
||||
m.d.sync += wr_addr.eq(wr_addr + 1)
|
||||
with m.If(desc_ctr == 3):
|
||||
m.d.sync += self.eth_wr_valid.eq(0)
|
||||
m.next = "RECV_AND_WRITE"
|
||||
with m.Else():
|
||||
m.d.sync += desc_ctr.eq(desc_ctr + 1)
|
||||
|
||||
with m.State("RECV_AND_WRITE"):
|
||||
# Accept bytes from W5500 and write each to SPRAM immediately
|
||||
m.d.comb += self.rx_ready.eq(~self.eth_wr_valid | self.eth_wr_ready)
|
||||
with m.If(self.rx_valid & (~self.eth_wr_valid | self.eth_wr_ready)):
|
||||
m.d.sync += self.eth_wr_addr.eq(wr_addr)
|
||||
m.d.sync += self.eth_wr_data.eq(self.rx_data)
|
||||
m.d.sync += self.eth_wr_valid.eq(1)
|
||||
m.d.sync += wr_addr.eq(wr_addr + 1)
|
||||
m.d.sync += data_ctr.eq(data_ctr + 1)
|
||||
with m.If(self.rx_eof):
|
||||
m.next = "WAIT_LAST_WRITE"
|
||||
with m.Elif(self.eth_wr_valid & self.eth_wr_ready):
|
||||
m.d.sync += self.eth_wr_valid.eq(0)
|
||||
|
||||
with m.State("WAIT_LAST_WRITE"):
|
||||
# Wait for the last data byte write to be accepted
|
||||
with m.If(~self.eth_wr_valid | self.eth_wr_ready):
|
||||
m.d.sync += self.eth_wr_valid.eq(0)
|
||||
# Compute pages used: ceil((data_ctr + 4) / 256)
|
||||
# = (total_len + 255) >> 8 = total_len[11:8] + (total_len[7:0] != 0)
|
||||
m.d.sync += pages_used.eq(total_len[8:12] + (total_len[:8] != 0))
|
||||
m.next = "WRITE_LEN_HI"
|
||||
|
||||
with m.State("WRITE_LEN_HI"):
|
||||
# Overwrite descriptor byte 2 with total_len[15:8]
|
||||
m.d.sync += self.eth_wr_addr.eq(desc_base + 2)
|
||||
m.d.sync += self.eth_wr_data.eq(total_len[8:12])
|
||||
m.d.sync += self.eth_wr_valid.eq(1)
|
||||
with m.If(self.eth_wr_ready):
|
||||
m.d.sync += self.eth_wr_valid.eq(0)
|
||||
m.next = "WRITE_LEN_LO"
|
||||
|
||||
with m.State("WRITE_LEN_LO"):
|
||||
# Overwrite descriptor byte 3 with total_len[7:0]
|
||||
m.d.sync += self.eth_wr_addr.eq(desc_base + 3)
|
||||
m.d.sync += self.eth_wr_data.eq(total_len[:8])
|
||||
m.d.sync += self.eth_wr_valid.eq(1)
|
||||
with m.If(self.eth_wr_ready):
|
||||
m.d.sync += self.eth_wr_valid.eq(0)
|
||||
m.next = "ADVANCE_RWP"
|
||||
|
||||
with m.State("ADVANCE_RWP"):
|
||||
# next_rwp = ((rwp - 1 + pages_used) % 15) + 1
|
||||
next_rwp_raw = Signal(8)
|
||||
m.d.comb += next_rwp_raw.eq(rwp + pages_used)
|
||||
with m.If(next_rwp_raw > _RX_PAGE_LAST):
|
||||
m.d.sync += rwp.eq(next_rwp_raw - _PAGES_TOTAL)
|
||||
with m.Else():
|
||||
m.d.sync += rwp.eq(next_rwp_raw)
|
||||
m.next = "PUSH_WPT"
|
||||
|
||||
with m.State("PUSH_WPT"):
|
||||
with m.If(self.rx_wptr_w_rdy):
|
||||
m.d.sync += self.rx_wptr_w_data.eq(rwp)
|
||||
m.d.sync += self.rx_wptr_w_en.eq(1)
|
||||
m.d.sync += self.rx_irq.eq(1)
|
||||
m.next = "IDLE"
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Testbench ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from amaranth.sim import Simulator, Period
|
||||
|
||||
dut = RXFrameAssembler()
|
||||
errors = []
|
||||
|
||||
# Track all SPRAM writes issued by the DUT
|
||||
spram_writes = []
|
||||
|
||||
async def testbench(ctx):
|
||||
# Setup: acknowledge all SPRAM writes immediately
|
||||
ctx.set(dut.eth_wr_ready, 1)
|
||||
ctx.set(dut.rx_wptr_w_rdy, 1)
|
||||
ctx.set(dut.rx_enabled, 1)
|
||||
await ctx.tick("sync").repeat(2)
|
||||
|
||||
# ── T1: 10-byte frame → pages_used=1, rwp advances 1→2 ──────────────
|
||||
# Send SOF + first byte
|
||||
frame = [0xAA, 0xBB, 0xCC, 0xDD, 0x08, 0x00, 0x45, 0x00, 0x00, 0x01]
|
||||
|
||||
ctx.set(dut.rx_data, frame[0])
|
||||
ctx.set(dut.rx_valid, 1)
|
||||
ctx.set(dut.rx_sof, 1)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
ctx.set(dut.rx_sof, 0)
|
||||
|
||||
for i, b in enumerate(frame[1:], start=1):
|
||||
ctx.set(dut.rx_data, b)
|
||||
ctx.set(dut.rx_eof, 1 if i == len(frame) - 1 else 0)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
|
||||
ctx.set(dut.rx_valid, 0)
|
||||
ctx.set(dut.rx_eof, 0)
|
||||
|
||||
# Poll for up to 30 ticks until rx_irq pulses (1-cycle pulse)
|
||||
t1_irq_seen = False
|
||||
t1_wptr_d = 0
|
||||
for _ in range(30):
|
||||
await ctx.tick("sync").repeat(1)
|
||||
if ctx.get(dut.rx_irq):
|
||||
t1_irq_seen = True
|
||||
t1_wptr_d = ctx.get(dut.rx_wptr_w_data)
|
||||
break
|
||||
|
||||
print(f"T1 rx_irq_seen={t1_irq_seen} wptr_data=0x{t1_wptr_d:02X}")
|
||||
if not t1_irq_seen:
|
||||
errors.append("T1: rx_irq never pulsed")
|
||||
if t1_wptr_d != 2:
|
||||
errors.append(f"T1: rwp should be 2 (page 1→2), got {t1_wptr_d}")
|
||||
|
||||
await ctx.tick("sync").repeat(4)
|
||||
|
||||
# ── T2: Send a second frame; verify rwp advances further ────────────
|
||||
frame2 = [0x11, 0x22, 0x33, 0x44, 0x55, 0x66]
|
||||
ctx.set(dut.rx_data, frame2[0])
|
||||
ctx.set(dut.rx_valid, 1)
|
||||
ctx.set(dut.rx_sof, 1)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
ctx.set(dut.rx_sof, 0)
|
||||
|
||||
for i, b in enumerate(frame2[1:], start=1):
|
||||
ctx.set(dut.rx_data, b)
|
||||
ctx.set(dut.rx_eof, 1 if i == len(frame2) - 1 else 0)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
|
||||
ctx.set(dut.rx_valid, 0)
|
||||
ctx.set(dut.rx_eof, 0)
|
||||
|
||||
t2_irq_seen = False
|
||||
t2_wptr_d = 0
|
||||
for _ in range(30):
|
||||
await ctx.tick("sync").repeat(1)
|
||||
if ctx.get(dut.rx_irq):
|
||||
t2_irq_seen = True
|
||||
t2_wptr_d = ctx.get(dut.rx_wptr_w_data)
|
||||
break
|
||||
|
||||
print(f"T2 rx_irq_seen={t2_irq_seen} wptr_data=0x{t2_wptr_d:02X}")
|
||||
if not t2_irq_seen:
|
||||
errors.append("T2: rx_irq never pulsed after second frame")
|
||||
if t2_wptr_d != 3:
|
||||
errors.append(f"T2: rwp should be 3 (page 2→3), got {t2_wptr_d}")
|
||||
|
||||
# ── T3: RX disabled — SOF must be ignored ──────────────────────────
|
||||
ctx.set(dut.rx_enabled, 0)
|
||||
ctx.set(dut.rx_data, 0xDE)
|
||||
ctx.set(dut.rx_valid, 1)
|
||||
ctx.set(dut.rx_sof, 1)
|
||||
await ctx.tick("sync").repeat(4)
|
||||
ctx.set(dut.rx_valid, 0)
|
||||
ctx.set(dut.rx_sof, 0)
|
||||
|
||||
# No SPRAM write should have been issued
|
||||
wr_valid = ctx.get(dut.eth_wr_valid)
|
||||
if wr_valid:
|
||||
errors.append("T3: SPRAM write issued while rx_enabled=0")
|
||||
print(f"T3 rx disabled: eth_wr_valid={wr_valid} (expected 0)")
|
||||
|
||||
sim = Simulator(dut)
|
||||
sim.add_clock(Period(MHz=24), domain="sync")
|
||||
sim.add_testbench(testbench)
|
||||
|
||||
with sim.write_vcd("RXFrameAssembler.vcd"):
|
||||
sim.run()
|
||||
|
||||
if errors:
|
||||
print("\nFAILURES:")
|
||||
for e in errors:
|
||||
print(" ", e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nAll tests passed.")
|
||||
@@ -0,0 +1,274 @@
|
||||
"""SPI Mode 3 byte-oriented slave for the EXI bus.
|
||||
|
||||
CPOL=1, CPHA=1: CLK idles HIGH.
|
||||
Slave samples MOSI on the FALLING CLK edge.
|
||||
Slave drives MISO on the RISING CLK edge (master samples on next falling edge).
|
||||
All three raw inputs are run through a 2-stage FFSynchronizer before use.
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
from amaranth.lib.cdc import FFSynchronizer
|
||||
|
||||
|
||||
# ── public re-export for import convenience ─────────────────────────────────
|
||||
__all__ = ["SPIMode3Slave"]
|
||||
|
||||
|
||||
class SPIMode3Slave(Elaboratable):
|
||||
"""Byte-oriented SPI Mode 3 slave.
|
||||
|
||||
Ports
|
||||
-----
|
||||
spi_clk / spi_mosi / spi_cs_n : raw async inputs from GC (synchronized internally)
|
||||
spi_miso : output to GC; idles HIGH when CS deasserted
|
||||
rx_byte : last complete received byte (valid when rx_valid pulses)
|
||||
rx_valid : 1-cycle pulse in exi domain when rx_byte contains a new byte
|
||||
tx_byte : upstream loads this before or within one exi clock of tx_load pulsing
|
||||
tx_load : 1-cycle pulse requesting the next TX byte from upstream
|
||||
"""
|
||||
|
||||
def __init__(self, domain="capture"):
|
||||
# Clock domain this byte engine runs in. Split-domain design puts the
|
||||
# bit engine in a fast `capture` domain (54 MHz) so it can oversample
|
||||
# a 27 MHz EXI clock ~3×; the register file lives in a slower domain.
|
||||
self._domain = domain
|
||||
|
||||
self.spi_clk = Signal(init=1) # idles HIGH
|
||||
self.spi_mosi = Signal()
|
||||
self.spi_cs_n = Signal(init=1) # active LOW
|
||||
|
||||
self.spi_miso = Signal() # combinatorial output
|
||||
|
||||
self.rx_byte = Signal(8)
|
||||
self.rx_valid = Signal()
|
||||
self.tx_byte = Signal(8)
|
||||
self.tx_load = Signal()
|
||||
|
||||
# 1-cycle pulse on CS assertion (transaction start). The capture
|
||||
# wrapper uses it to reset its per-transaction TX byte counter.
|
||||
self.frame_start = Signal()
|
||||
|
||||
# Level: high while CS is asserted (a transaction is in progress).
|
||||
# Lets downstream logic detect variable-length (DMA) transaction ends.
|
||||
self.cs_active = Signal()
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
d = self._domain
|
||||
|
||||
# ── Input synchronization (async → exi, 2 stages) ──────────────────
|
||||
clk_s = Signal(init=1)
|
||||
mosi_s = Signal()
|
||||
cs_s = Signal(init=1)
|
||||
|
||||
m.submodules.sync_clk = FFSynchronizer(self.spi_clk, clk_s, o_domain=d, init=1)
|
||||
m.submodules.sync_mosi = FFSynchronizer(self.spi_mosi, mosi_s, o_domain=d)
|
||||
m.submodules.sync_cs = FFSynchronizer(self.spi_cs_n, cs_s, o_domain=d, init=1)
|
||||
|
||||
# ── Edge detection ──────────────────────────────────────────────────
|
||||
clk_prev = Signal(init=1)
|
||||
cs_prev = Signal(init=1)
|
||||
m.d[d] += clk_prev.eq(clk_s)
|
||||
m.d[d] += cs_prev.eq(cs_s)
|
||||
|
||||
falling_clk = Signal()
|
||||
rising_clk = Signal()
|
||||
cs_fall = Signal()
|
||||
cs_rise = Signal()
|
||||
m.d.comb += falling_clk.eq(~clk_s & clk_prev)
|
||||
m.d.comb += rising_clk .eq( clk_s & ~clk_prev)
|
||||
m.d.comb += cs_fall .eq(~cs_s & cs_prev)
|
||||
m.d.comb += cs_rise .eq( cs_s & ~cs_prev)
|
||||
m.d.comb += self.frame_start.eq(cs_fall)
|
||||
m.d.comb += self.cs_active.eq(~cs_s)
|
||||
|
||||
# ── Shift registers ─────────────────────────────────────────────────
|
||||
rx_shift = Signal(8)
|
||||
tx_shift = Signal(8)
|
||||
bit_ctr = Signal(4) # counts 0..7; 7 means "8th (last) bit"
|
||||
armed = Signal(init=1) # between bytes: drive the LIVE tx_byte MSB
|
||||
rearm = Signal() # arm for next byte on the next rising edge
|
||||
|
||||
# MISO: idle HIGH when CS deasserted. While "armed" — i.e. at the start
|
||||
# of a byte, including the inter-byte / clock-idle gap before the first
|
||||
# falling edge — drive the LIVE tx_byte MSB. This is what lets a
|
||||
# response that upstream pushes DURING the EXI clock-idle gap reach MISO
|
||||
# in time: there is no clock edge during the gap to latch it, so MISO
|
||||
# must be combinational on tx_byte until the byte actually starts. Once
|
||||
# shifting (after the first falling edge) drive the latched shift reg.
|
||||
m.d.comb += self.spi_miso.eq(
|
||||
Mux(cs_s, 1, Mux(armed, self.tx_byte[7], tx_shift[7]))
|
||||
)
|
||||
|
||||
# Default: deassert single-cycle pulses every cycle
|
||||
m.d[d] += self.rx_valid.eq(0)
|
||||
m.d[d] += self.tx_load.eq(0)
|
||||
|
||||
with m.If(cs_fall):
|
||||
# Transaction start: first byte drives its MSB live (armed).
|
||||
m.d[d] += bit_ctr.eq(0)
|
||||
m.d[d] += armed.eq(1)
|
||||
|
||||
with m.Elif(cs_rise | cs_s):
|
||||
# CS deasserted / idle: reset state
|
||||
m.d[d] += bit_ctr.eq(0)
|
||||
m.d[d] += armed.eq(1)
|
||||
|
||||
with m.Else():
|
||||
# CS asserted: run bit engine
|
||||
with m.If(falling_clk):
|
||||
# Sample MOSI (MSB first: left-shift, new bit enters at LSB)
|
||||
# Cat(a, b) → a at lower bits; so Cat(mosi, rx[6:0]) = {rx[6:0], mosi}
|
||||
m.d[d] += rx_shift.eq(Cat(mosi_s, rx_shift[:-1]))
|
||||
|
||||
with m.If(armed):
|
||||
# First falling edge of this byte: master has just sampled
|
||||
# the MSB (driven live above). Latch tx_byte so the
|
||||
# remaining 7 bits shift out of a stable register.
|
||||
m.d[d] += tx_shift.eq(self.tx_byte)
|
||||
m.d[d] += armed.eq(0)
|
||||
|
||||
with m.If(bit_ctr == 7):
|
||||
# 8th falling edge: byte complete. The master samples the
|
||||
# LSB on THIS edge, so MISO must still hold tx_shift[7].
|
||||
# Defer arming to the next rising edge (rearm) so MISO is
|
||||
# not switched to the next byte's live MSB too early.
|
||||
m.d[d] += self.rx_byte.eq(Cat(mosi_s, rx_shift[:-1]))
|
||||
m.d[d] += self.rx_valid.eq(1)
|
||||
m.d[d] += bit_ctr.eq(0)
|
||||
m.d[d] += self.tx_load.eq(1) # advance source to next byte
|
||||
m.d[d] += rearm.eq(1) # arm on the next rising edge
|
||||
with m.Else():
|
||||
m.d[d] += bit_ctr.eq(bit_ctr + 1)
|
||||
|
||||
with m.If(rising_clk):
|
||||
with m.If(rearm):
|
||||
# Byte boundary: arm for the next byte (live MSB drive).
|
||||
m.d[d] += armed.eq(1)
|
||||
m.d[d] += rearm.eq(0)
|
||||
with m.Elif(~armed):
|
||||
# Shift left: next bit into MSB position
|
||||
# Cat(0, tx[6:0]) = {tx[6:0], 0} — left shift
|
||||
m.d[d] += tx_shift.eq(Cat(0, tx_shift[:-1]))
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Testbench ───────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
from amaranth.sim import Simulator, Period
|
||||
|
||||
dut = SPIMode3Slave()
|
||||
|
||||
# 4 exi ticks per SPI half-period → well above the 3-cycle (2 sync + 1 edge) latency.
|
||||
HALF = 4
|
||||
|
||||
async def spi_send_byte(ctx, mosi_val, next_tx_byte=None):
|
||||
"""Drive one SPI Mode 3 byte on MOSI; return the MISO byte assembled.
|
||||
|
||||
next_tx_byte: if given, written to tx_byte after the LAST falling edge
|
||||
(before the last rising edge) so need_reload picks it up in time.
|
||||
"""
|
||||
miso_byte = 0
|
||||
for bit in range(7, -1, -1):
|
||||
ctx.set(dut.spi_mosi, (mosi_val >> bit) & 1)
|
||||
ctx.set(dut.spi_clk, 0) # falling edge
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
miso_byte = (miso_byte << 1) | ctx.get(dut.spi_miso)
|
||||
# Set next TX byte here — after last fall, before rising edge.
|
||||
# The rising edge is detected 3 cycles after we assert clk=1,
|
||||
# so we have HALF ticks of margin.
|
||||
if bit == 0 and next_tx_byte is not None:
|
||||
ctx.set(dut.tx_byte, next_tx_byte)
|
||||
ctx.set(dut.spi_clk, 1) # rising edge
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
return miso_byte
|
||||
|
||||
errors = []
|
||||
|
||||
async def testbench(ctx):
|
||||
# ── Test 1: Single byte TX/RX ──────────────────────────────────────
|
||||
ctx.set(dut.spi_cs_n, 0)
|
||||
ctx.set(dut.spi_clk, 1)
|
||||
ctx.set(dut.tx_byte, 0xA5) # pre-load before CS fall is detected
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
miso = await spi_send_byte(ctx, 0x37)
|
||||
await ctx.tick("capture").repeat(2)
|
||||
rx = ctx.get(dut.rx_byte)
|
||||
|
||||
ctx.set(dut.spi_cs_n, 1)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
if rx != 0x37:
|
||||
errors.append(f"Test1 rx_byte: expected 0x37, got 0x{rx:02X}")
|
||||
if miso != 0xA5:
|
||||
errors.append(f"Test1 miso: expected 0xA5, got 0x{miso:02X}")
|
||||
print(f"Test1 – MOSI→rx_byte: 0x{rx:02X} MISO←tx_byte: 0x{miso:02X}")
|
||||
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
# ── Test 2: Two-byte transaction; second byte loaded via need_reload ─
|
||||
ctx.set(dut.spi_cs_n, 0)
|
||||
ctx.set(dut.tx_byte, 0xBE) # first response byte
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
# Pass next_tx_byte=0xEF so it's set after last falling edge of byte 0,
|
||||
# giving need_reload time to load it on the subsequent rising edge.
|
||||
miso0 = await spi_send_byte(ctx, 0x00, next_tx_byte=0xEF)
|
||||
miso1 = await spi_send_byte(ctx, 0xFF)
|
||||
|
||||
await ctx.tick("capture").repeat(2)
|
||||
rx1 = ctx.get(dut.rx_byte)
|
||||
|
||||
ctx.set(dut.spi_cs_n, 1)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
if miso0 != 0xBE:
|
||||
errors.append(f"Test2 miso0: expected 0xBE, got 0x{miso0:02X}")
|
||||
if miso1 != 0xEF:
|
||||
errors.append(f"Test2 miso1: expected 0xEF, got 0x{miso1:02X}")
|
||||
if rx1 != 0xFF:
|
||||
errors.append(f"Test2 rx1: expected 0xFF, got 0x{rx1:02X}")
|
||||
print(f"Test2 – byte0 MISO: 0x{miso0:02X} byte1 MISO: 0x{miso1:02X} rx1: 0x{rx1:02X}")
|
||||
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
# ── Test 3: MISO idles HIGH when CS deasserted ─────────────────────
|
||||
miso_idle = ctx.get(dut.spi_miso)
|
||||
if miso_idle != 1:
|
||||
errors.append(f"Test3 MISO idle: expected 1, got {miso_idle}")
|
||||
print(f"Test3 – MISO idle (CS=1): {miso_idle}")
|
||||
|
||||
# ── Test 4: All-zeros byte (0x00) TX and RX ────────────────────────
|
||||
ctx.set(dut.spi_cs_n, 0)
|
||||
ctx.set(dut.tx_byte, 0x00)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
miso = await spi_send_byte(ctx, 0xFF)
|
||||
await ctx.tick("capture").repeat(2)
|
||||
rx = ctx.get(dut.rx_byte)
|
||||
ctx.set(dut.spi_cs_n, 1)
|
||||
await ctx.tick("capture").repeat(HALF)
|
||||
|
||||
if miso != 0x00:
|
||||
errors.append(f"Test4 miso: expected 0x00, got 0x{miso:02X}")
|
||||
if rx != 0xFF:
|
||||
errors.append(f"Test4 rx: expected 0xFF, got 0x{rx:02X}")
|
||||
print(f"Test4 – 0x00 TX / 0xFF RX: MISO=0x{miso:02X} rx=0x{rx:02X}")
|
||||
|
||||
sim = Simulator(dut)
|
||||
sim.add_clock(Period(MHz=54), domain="capture")
|
||||
sim.add_testbench(testbench)
|
||||
|
||||
with sim.write_vcd("SPIMode3Slave.vcd"):
|
||||
sim.run()
|
||||
|
||||
if errors:
|
||||
print("\nFAILURES:")
|
||||
for e in errors:
|
||||
print(" ", e)
|
||||
raise SystemExit(1)
|
||||
else:
|
||||
print("\nAll tests passed.")
|
||||
@@ -0,0 +1,276 @@
|
||||
"""SPRAM arbiter — sync domain (24 MHz).
|
||||
|
||||
Owns the iCE40UP5K 128 KB SPRAM (SB_SPRAM256KA, 16-bit wide) and arbitrates
|
||||
between two clients:
|
||||
|
||||
Client A (EXI read) : prefetch pipeline; low priority.
|
||||
Client B (ETH write): RXFrameAssembler; high priority.
|
||||
|
||||
ETH writes win when both clients are active. This is safe because the GC only
|
||||
reads pages that the ETH engine has already finished writing (ring-buffer
|
||||
invariant).
|
||||
|
||||
SPRAM addressing
|
||||
-----------------
|
||||
SB_SPRAM256KA is 64 K × 16-bit. Byte addressing:
|
||||
ADDRESS = byte_addr >> 1
|
||||
MASKWREN[3:0]:
|
||||
0b0011 → write lower byte (byte_addr even)
|
||||
0b1100 → write upper byte (byte_addr odd)
|
||||
Read: both bytes returned; pick the right one from DATAOUT based on addr bit 0.
|
||||
|
||||
Read latency: 1 synchronous cycle — result of cycle N is valid at N+1.
|
||||
|
||||
In simulation (platform is None) a behavioural Array model is used instead of
|
||||
the SB_SPRAM256KA Instance so tests run without IceStorm.
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
from amaranth.lib.memory import Memory
|
||||
|
||||
__all__ = ["SPRAMArbiter"]
|
||||
|
||||
_SPRAM_WORDS = 65536 # 64 K 16-bit words = 128 KB
|
||||
|
||||
|
||||
class SPRAMArbiter(Elaboratable):
|
||||
"""Arbitrated SPRAM controller in the sync domain.
|
||||
|
||||
EXI read interface (from BBARegisterFile spram_req / spram_rsp FIFOs)
|
||||
----------------------------------------------------------------------
|
||||
exi_req_addr : 16-bit byte address to read
|
||||
exi_req_valid : FIFO r_rdy — a request is waiting
|
||||
exi_req_ready : FIFO r_en — pop the request (asserted when serviced)
|
||||
exi_rsp_data : 8-bit result byte
|
||||
exi_rsp_valid : FIFO w_en — push result when valid
|
||||
|
||||
ETH write interface (from RXFrameAssembler)
|
||||
-------------------------------------------
|
||||
eth_wr_addr : 16-bit byte address to write
|
||||
eth_wr_data : 8-bit byte value
|
||||
eth_wr_valid : write request present
|
||||
eth_wr_ready : write accepted this cycle
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# EXI read interface
|
||||
self.exi_req_addr = Signal(16)
|
||||
self.exi_req_valid = Signal()
|
||||
self.exi_req_ready = Signal()
|
||||
self.exi_rsp_data = Signal(8)
|
||||
self.exi_rsp_valid = Signal()
|
||||
|
||||
# ETH write interface
|
||||
self.eth_wr_addr = Signal(16)
|
||||
self.eth_wr_data = Signal(8)
|
||||
self.eth_wr_valid = Signal()
|
||||
self.eth_wr_ready = Signal()
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
|
||||
# ── SPRAM instantiation (hardware vs simulation) ──────────────────
|
||||
spram_addr = Signal(14) # word address (byte_addr >> 1)
|
||||
spram_din = Signal(16)
|
||||
spram_dout = Signal(16)
|
||||
spram_wren = Signal()
|
||||
spram_mask = Signal(4) # MASKWREN
|
||||
|
||||
if platform is None:
|
||||
# Behavioural model: synchronous read with 1-cycle latency.
|
||||
# Memory is a Component; read/write ports are obtained from it
|
||||
# and wired via its submodule ports (not added as separate submodules).
|
||||
mem = Memory(shape=16, depth=_SPRAM_WORDS, init=[])
|
||||
m.submodules.mem = mem
|
||||
mem_rd = mem.read_port(domain="sync", transparent_for=[])
|
||||
mem_wr = mem.write_port(domain="sync", granularity=8)
|
||||
|
||||
# en[0] = lower byte enable, en[1] = upper byte enable
|
||||
byte0_en = Signal()
|
||||
byte1_en = Signal()
|
||||
m.d.comb += [
|
||||
byte0_en .eq(spram_wren & (spram_mask[0] | spram_mask[1])),
|
||||
byte1_en .eq(spram_wren & (spram_mask[2] | spram_mask[3])),
|
||||
mem_rd.addr .eq(spram_addr),
|
||||
mem_rd.en .eq(1),
|
||||
spram_dout .eq(mem_rd.data),
|
||||
mem_wr.addr .eq(spram_addr),
|
||||
mem_wr.data .eq(spram_din),
|
||||
mem_wr.en .eq(Cat(byte0_en, byte1_en)),
|
||||
]
|
||||
else:
|
||||
# Hardware: instantiate two SB_SPRAM256KA (64K×16 each; use one)
|
||||
m.submodules.spram = Instance(
|
||||
"SB_SPRAM256KA",
|
||||
i_ADDRESS = spram_addr,
|
||||
i_DATAIN = spram_din,
|
||||
i_MASKWREN = spram_mask,
|
||||
i_WREN = spram_wren,
|
||||
i_CHIPSELECT = Const(1, 1),
|
||||
i_CLOCK = ClockSignal("sync"),
|
||||
i_STANDBY = Const(0, 1),
|
||||
i_SLEEP = Const(0, 1),
|
||||
i_POWEROFF = Const(1, 1),
|
||||
o_DATAOUT = spram_dout,
|
||||
)
|
||||
|
||||
# ── Arbiter pipeline ─────────────────────────────────────────────
|
||||
# Stage 1: issue SPRAM address and control signals (combinatorial)
|
||||
# Stage 2: capture SPRAM output into rsp_buf (synchronous, 1-cycle)
|
||||
|
||||
read_pending = Signal() # a read address was issued last cycle
|
||||
read_was_odd = Signal() # byte address bit 0 of the pending read
|
||||
rsp_buf = Signal(8) # registered response byte; valid when exi_rsp_valid
|
||||
|
||||
# Combinatorial defaults
|
||||
m.d.comb += [
|
||||
spram_wren .eq(0),
|
||||
spram_mask .eq(0),
|
||||
spram_din .eq(0),
|
||||
spram_addr .eq(0),
|
||||
self.exi_req_ready.eq(0),
|
||||
self.eth_wr_ready .eq(0),
|
||||
self.exi_rsp_data .eq(rsp_buf), # always sourced from registered buffer
|
||||
]
|
||||
# Registered defaults
|
||||
m.d.sync += [
|
||||
self.exi_rsp_valid.eq(0),
|
||||
read_pending .eq(0),
|
||||
]
|
||||
|
||||
# ETH write has priority
|
||||
with m.If(self.eth_wr_valid):
|
||||
m.d.comb += [
|
||||
spram_addr .eq(self.eth_wr_addr[1:]),
|
||||
spram_wren .eq(1),
|
||||
self.eth_wr_ready.eq(1),
|
||||
]
|
||||
with m.If(self.eth_wr_addr[0]):
|
||||
m.d.comb += [
|
||||
spram_din [8:16].eq(self.eth_wr_data),
|
||||
spram_mask .eq(0b1100),
|
||||
]
|
||||
with m.Else():
|
||||
m.d.comb += [
|
||||
spram_din [0:8].eq(self.eth_wr_data),
|
||||
spram_mask .eq(0b0011),
|
||||
]
|
||||
|
||||
# EXI read (lower priority)
|
||||
with m.Elif(self.exi_req_valid):
|
||||
m.d.comb += [
|
||||
spram_addr .eq(self.exi_req_addr[1:]),
|
||||
self.exi_req_ready.eq(1),
|
||||
]
|
||||
m.d.sync += [
|
||||
read_pending.eq(1),
|
||||
read_was_odd.eq(self.exi_req_addr[0]),
|
||||
]
|
||||
|
||||
# Capture SPRAM output into registered buffer after 1-cycle latency
|
||||
with m.If(read_pending):
|
||||
with m.If(read_was_odd):
|
||||
m.d.sync += rsp_buf.eq(spram_dout[8:16])
|
||||
with m.Else():
|
||||
m.d.sync += rsp_buf.eq(spram_dout[0:8])
|
||||
m.d.sync += self.exi_rsp_valid.eq(1)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Testbench ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from amaranth.sim import Simulator, Period
|
||||
|
||||
dut = SPRAMArbiter()
|
||||
errors = []
|
||||
|
||||
async def testbench(ctx):
|
||||
await ctx.tick("sync").repeat(2)
|
||||
|
||||
# T1: ETH write to even byte address 0x0100, then EXI read it back
|
||||
ctx.set(dut.eth_wr_addr, 0x0100)
|
||||
ctx.set(dut.eth_wr_data, 0xAB)
|
||||
ctx.set(dut.eth_wr_valid, 1)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
accepted = ctx.get(dut.eth_wr_ready)
|
||||
if not accepted:
|
||||
errors.append("T1 eth write not accepted")
|
||||
ctx.set(dut.eth_wr_valid, 0)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
|
||||
# Issue EXI read of the same address
|
||||
ctx.set(dut.exi_req_addr, 0x0100)
|
||||
ctx.set(dut.exi_req_valid, 1)
|
||||
await ctx.tick("sync").repeat(1) # clock A: read issued, read_pending=1
|
||||
ctx.set(dut.exi_req_valid, 0)
|
||||
await ctx.tick("sync").repeat(1) # clock B: SPRAM output captured, valid=1
|
||||
# Check HERE — exi_rsp_valid is 1 for exactly this one cycle
|
||||
|
||||
rdata = ctx.get(dut.exi_rsp_data)
|
||||
rvalid = ctx.get(dut.exi_rsp_valid)
|
||||
if rdata != 0xAB:
|
||||
errors.append(f"T1 read back: expected 0xAB, got 0x{rdata:02X}")
|
||||
if not rvalid:
|
||||
errors.append("T1 exi_rsp_valid not set")
|
||||
print(f"T1 even addr read-back: data=0x{rdata:02X} valid={rvalid}")
|
||||
|
||||
await ctx.tick("sync").repeat(2)
|
||||
|
||||
# T2: ETH write to ODD byte address 0x0101, read back
|
||||
ctx.set(dut.eth_wr_addr, 0x0101)
|
||||
ctx.set(dut.eth_wr_data, 0xCD)
|
||||
ctx.set(dut.eth_wr_valid, 1)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
ctx.set(dut.eth_wr_valid, 0)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
|
||||
ctx.set(dut.exi_req_addr, 0x0101)
|
||||
ctx.set(dut.exi_req_valid, 1)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
ctx.set(dut.exi_req_valid, 0)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
|
||||
rdata = ctx.get(dut.exi_rsp_data)
|
||||
if rdata != 0xCD:
|
||||
errors.append(f"T2 odd addr read-back: expected 0xCD, got 0x{rdata:02X}")
|
||||
print(f"T2 odd addr read-back: data=0x{rdata:02X}")
|
||||
|
||||
await ctx.tick("sync").repeat(2)
|
||||
|
||||
# T3: ETH write wins when both clients active simultaneously
|
||||
# Write 0xEE to 0x0200
|
||||
ctx.set(dut.eth_wr_addr, 0x0200)
|
||||
ctx.set(dut.eth_wr_data, 0xEE)
|
||||
ctx.set(dut.eth_wr_valid, 1)
|
||||
ctx.set(dut.exi_req_addr, 0x0100) # also wants to read
|
||||
ctx.set(dut.exi_req_valid, 1)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
|
||||
eth_won = ctx.get(dut.eth_wr_ready)
|
||||
exi_blocked = not ctx.get(dut.exi_req_ready)
|
||||
ctx.set(dut.eth_wr_valid, 0)
|
||||
ctx.set(dut.exi_req_valid, 0)
|
||||
|
||||
if not eth_won:
|
||||
errors.append("T3 ETH priority: ETH write not accepted")
|
||||
if not exi_blocked:
|
||||
errors.append("T3 ETH priority: EXI read was not blocked")
|
||||
print(f"T3 ETH priority: eth_won={eth_won} exi_blocked={exi_blocked}")
|
||||
|
||||
sim = Simulator(dut)
|
||||
sim.add_clock(Period(MHz=24), domain="sync")
|
||||
sim.add_testbench(testbench)
|
||||
|
||||
with sim.write_vcd("SPRAMArbiter.vcd"):
|
||||
sim.run()
|
||||
|
||||
if errors:
|
||||
print("\nFAILURES:")
|
||||
for e in errors:
|
||||
print(" ", e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nAll tests passed.")
|
||||
@@ -0,0 +1,227 @@
|
||||
"""StatusPanel — 5-LED / 3-button bring-up panel (sync domain).
|
||||
|
||||
A development/diagnostics front panel for the iCEbreaker LED+button PMOD. It
|
||||
turns the device's internal liveness signals into something you can watch on a
|
||||
real GameCube during bring-up, and gives three buttons for manual control.
|
||||
|
||||
LEDs (logical, active-high; set `led_active_low=True` if the board sinks current)
|
||||
led[0] heartbeat — ~1–2 Hz blink: clock alive, bitstream loaded
|
||||
led[1] exi_active — stretched `cs_active`: the GC is talking on EXI
|
||||
led[2] rx_act — stretched `rx_pulse`: a packet arrived from the net
|
||||
led[3] tx_act — stretched `tx_pulse`: a packet went out
|
||||
led[4] ready — `ready` level (e.g. ethernet init complete)
|
||||
|
||||
Buttons (raw pin level; `btn_active_low=True` for the usual pull-up wiring)
|
||||
btn[0] eth_rst — while held, drive `eth_rst_n` low (reset the ethernet chip)
|
||||
btn[1] reinit — on press, emit a one-cycle `reinit` pulse (force re-init)
|
||||
btn[2] freeze — toggle: latch the rx/tx activity LEDs so a single one-shot
|
||||
blink sticks until you unfreeze (catch a lone packet)
|
||||
|
||||
Single-cycle events (`rx_pulse`/`tx_pulse`) are stretched to ~`stretch_cycles`
|
||||
so the eye can see them; `cs_active` is a level that is re-triggered while high.
|
||||
Buttons are debounced (`debounce_cycles` stable samples) — same idea as
|
||||
`rebbarb/debouncer.py`, inlined here to keep this module self-contained.
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
|
||||
__all__ = ["StatusPanel"]
|
||||
|
||||
|
||||
class StatusPanel(Elaboratable):
|
||||
def __init__(self, hb_bit=23, stretch_cycles=1_440_000,
|
||||
debounce_cycles=240_000, led_active_low=False,
|
||||
btn_active_low=True):
|
||||
# hb_bit: heartbeat = bit `hb_bit` of a free-running counter
|
||||
# (24 MHz / 2**23 ≈ 1.4 Hz). stretch_cycles ≈ 60 ms at 24 MHz.
|
||||
self._hb_bit = hb_bit
|
||||
self._stretch = stretch_cycles
|
||||
self._deb = debounce_cycles
|
||||
self._led_inv = led_active_low
|
||||
self._btn_inv = btn_active_low
|
||||
|
||||
# Status inputs (sync domain)
|
||||
self.cs_active = Signal() # level: EXI transaction in progress
|
||||
self.rx_pulse = Signal() # 1-cycle: frame received
|
||||
self.tx_pulse = Signal() # 1-cycle: frame sent
|
||||
self.ready = Signal() # level: ethernet ready
|
||||
|
||||
# Raw button inputs (from pins)
|
||||
self.btn = Signal(3)
|
||||
|
||||
# Outputs
|
||||
self.led = Signal(5)
|
||||
self.eth_rst_n = Signal(init=1) # btn0 held → 0
|
||||
self.reinit = Signal() # btn1 press → 1-cycle pulse
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
|
||||
# ── Heartbeat ────────────────────────────────────────────────────
|
||||
hb = Signal(self._hb_bit + 1)
|
||||
m.d.sync += hb.eq(hb + 1)
|
||||
heartbeat = hb[self._hb_bit]
|
||||
|
||||
# ── Button conditioning (normalise polarity → debounce) ──────────
|
||||
braw = Signal(3)
|
||||
m.d.comb += braw.eq(self.btn ^ C(0b111 if self._btn_inv else 0, 3))
|
||||
|
||||
bdeb = Signal(3)
|
||||
for i in range(3):
|
||||
cnt = Signal(range(self._deb + 1), name=f"deb_cnt{i}")
|
||||
with m.If(braw[i] == bdeb[i]):
|
||||
m.d.sync += cnt.eq(0) # stable: hold
|
||||
with m.Else():
|
||||
m.d.sync += cnt.eq(cnt + 1) # changing: count stable samples
|
||||
with m.If(cnt == self._deb - 1):
|
||||
m.d.sync += [bdeb[i].eq(braw[i]), cnt.eq(0)]
|
||||
|
||||
# btn0: hold → ethernet reset asserted (active-low output)
|
||||
m.d.comb += self.eth_rst_n.eq(~bdeb[0])
|
||||
|
||||
# btn1: rising edge → reinit pulse
|
||||
b1_prev = Signal()
|
||||
m.d.sync += b1_prev.eq(bdeb[1])
|
||||
m.d.comb += self.reinit.eq(bdeb[1] & ~b1_prev)
|
||||
|
||||
# btn2: rising edge toggles freeze
|
||||
b2_prev = Signal()
|
||||
freeze = Signal()
|
||||
m.d.sync += b2_prev.eq(bdeb[2])
|
||||
with m.If(bdeb[2] & ~b2_prev):
|
||||
m.d.sync += freeze.eq(~freeze)
|
||||
|
||||
# ── Activity stretchers (rx/tx), sticky while frozen ─────────────
|
||||
def stretch(pulse, name):
|
||||
cnt = Signal(range(self._stretch + 1), name=f"{name}_cnt")
|
||||
sticky = Signal(name=f"{name}_sticky")
|
||||
with m.If(pulse):
|
||||
m.d.sync += cnt.eq(self._stretch)
|
||||
with m.If(freeze):
|
||||
m.d.sync += sticky.eq(1) # latch a one-shot when frozen
|
||||
with m.Elif(cnt != 0):
|
||||
m.d.sync += cnt.eq(cnt - 1)
|
||||
with m.If(~freeze):
|
||||
m.d.sync += sticky.eq(0) # clear sticky when unfrozen
|
||||
return (cnt != 0) | sticky
|
||||
|
||||
rx_led = stretch(self.rx_pulse, "rx")
|
||||
tx_led = stretch(self.tx_pulse, "tx")
|
||||
|
||||
# ── cs_active: level → stretched so brief transactions are visible ─
|
||||
cs_cnt = Signal(range(self._stretch + 1))
|
||||
with m.If(self.cs_active):
|
||||
m.d.sync += cs_cnt.eq(self._stretch)
|
||||
with m.Elif(cs_cnt != 0):
|
||||
m.d.sync += cs_cnt.eq(cs_cnt - 1)
|
||||
cs_led = cs_cnt != 0
|
||||
|
||||
leds = Cat(heartbeat, cs_led, rx_led, tx_led, self.ready)
|
||||
m.d.comb += self.led.eq(leds ^ C(0b11111 if self._led_inv else 0, 5))
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Testbench ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from amaranth.sim import Simulator, Period
|
||||
|
||||
# Tiny parameters so the timed behaviours are observable in a short sim.
|
||||
dut = StatusPanel(hb_bit=3, stretch_cycles=8, debounce_cycles=3)
|
||||
errors = []
|
||||
|
||||
async def settle(ctx, n=1):
|
||||
await ctx.tick("sync").repeat(n)
|
||||
|
||||
async def testbench(ctx):
|
||||
ctx.set(dut.btn, 0b111) # active-low idle (no press)
|
||||
await settle(ctx, 4)
|
||||
|
||||
# T1: heartbeat toggles (bit 3 of the counter flips every 8 cycles)
|
||||
h0 = ctx.get(dut.led) & 1
|
||||
await settle(ctx, 8)
|
||||
h1 = ctx.get(dut.led) & 1
|
||||
if h0 == h1:
|
||||
errors.append("T1 heartbeat did not toggle over 8 cycles")
|
||||
print(f"T1 heartbeat toggled: {h0} -> {h1}")
|
||||
|
||||
# T2: rx pulse lights led[2] and it stretches, then clears
|
||||
ctx.set(dut.rx_pulse, 1)
|
||||
await settle(ctx, 1)
|
||||
ctx.set(dut.rx_pulse, 0)
|
||||
await settle(ctx, 1)
|
||||
on = (ctx.get(dut.led) >> 2) & 1
|
||||
if not on:
|
||||
errors.append("T2 rx LED not lit after pulse")
|
||||
await settle(ctx, 12) # > stretch_cycles
|
||||
off = (ctx.get(dut.led) >> 2) & 1
|
||||
if off:
|
||||
errors.append("T2 rx LED did not clear after stretch")
|
||||
print(f"T2 rx LED: on={on} then off={not off}")
|
||||
|
||||
# T3: ready level drives led[4]
|
||||
ctx.set(dut.ready, 1)
|
||||
await settle(ctx, 1)
|
||||
if not ((ctx.get(dut.led) >> 4) & 1):
|
||||
errors.append("T3 ready LED not lit")
|
||||
ctx.set(dut.ready, 0)
|
||||
print("T3 ready LED follows level")
|
||||
|
||||
# T4: btn0 held (active-low → drive 0) asserts eth_rst_n low after debounce
|
||||
ctx.set(dut.btn, 0b110) # btn0 pressed
|
||||
await settle(ctx, 6) # > debounce
|
||||
if ctx.get(dut.eth_rst_n) != 0:
|
||||
errors.append("T4 eth_rst_n not asserted while btn0 held")
|
||||
ctx.set(dut.btn, 0b111) # release
|
||||
await settle(ctx, 6)
|
||||
if ctx.get(dut.eth_rst_n) != 1:
|
||||
errors.append("T4 eth_rst_n not released")
|
||||
print("T4 btn0 → eth_rst_n hold/release ok")
|
||||
|
||||
# T5: btn1 press emits exactly one reinit pulse
|
||||
pulses = 0
|
||||
ctx.set(dut.btn, 0b101) # btn1 pressed
|
||||
for _ in range(10):
|
||||
await settle(ctx, 1)
|
||||
pulses += (ctx.get(dut.reinit) & 1)
|
||||
ctx.set(dut.btn, 0b111)
|
||||
await settle(ctx, 6)
|
||||
if pulses != 1:
|
||||
errors.append(f"T5 reinit pulses: got {pulses}, want 1")
|
||||
print(f"T5 btn1 → reinit pulses={pulses}")
|
||||
|
||||
# T6: freeze (btn2) makes a single rx pulse stick
|
||||
ctx.set(dut.btn, 0b011) # btn2 press → toggle freeze on
|
||||
await settle(ctx, 6)
|
||||
ctx.set(dut.btn, 0b111)
|
||||
await settle(ctx, 2)
|
||||
ctx.set(dut.rx_pulse, 1) # one-shot while frozen
|
||||
await settle(ctx, 1)
|
||||
ctx.set(dut.rx_pulse, 0)
|
||||
await settle(ctx, 20) # well past stretch
|
||||
stuck = (ctx.get(dut.led) >> 2) & 1
|
||||
if not stuck:
|
||||
errors.append("T6 frozen rx LED did not stick")
|
||||
ctx.set(dut.btn, 0b011) # toggle freeze off
|
||||
await settle(ctx, 6)
|
||||
ctx.set(dut.btn, 0b111)
|
||||
await settle(ctx, 2)
|
||||
cleared = ((ctx.get(dut.led) >> 2) & 1) == 0
|
||||
if not cleared:
|
||||
errors.append("T6 rx LED did not clear after unfreeze")
|
||||
print(f"T6 freeze: stuck={stuck} cleared_after_unfreeze={cleared}")
|
||||
|
||||
sim = Simulator(dut)
|
||||
sim.add_clock(Period(MHz=24), domain="sync")
|
||||
sim.add_testbench(testbench)
|
||||
sim.run()
|
||||
|
||||
if errors:
|
||||
print("\nFAILURES:")
|
||||
for e in errors:
|
||||
print(" ", e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nAll tests passed.")
|
||||
@@ -0,0 +1,197 @@
|
||||
"""Synthesis script for BBATop → iCEbreaker (iCE40UP5K SG48).
|
||||
|
||||
Run from workspace root:
|
||||
python -m exi_bba.synth # synthesize only
|
||||
python -m exi_bba.synth --flash # synthesize and flash
|
||||
|
||||
This file re-declares IceBreakerPlatform inline so that importing
|
||||
rebbarb/rebbarb.py (which has a module-level platform.build() call) is avoided.
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from amaranth import *
|
||||
from amaranth.build import *
|
||||
from amaranth.vendor import LatticeICE40Platform
|
||||
|
||||
from exi_bba.bba_top import BBATop
|
||||
|
||||
|
||||
# ── Platform definition ───────────────────────────────────────────────────
|
||||
# Pin assignments use the iCEbreaker PMOD connectors as placeholders.
|
||||
# Replace with actual SP1-interposer pin numbers once PCB is finalised.
|
||||
#
|
||||
# PMOD1A (J2): pins 4 2 47 45 / 3 48 46 44 (top/bottom)
|
||||
# PMOD1B (J3): pins 43 38 34 31 / 42 36 32 28
|
||||
# PMOD2 (J4): pins 27 25 21 19 / 26 23 20 18
|
||||
#
|
||||
# EXI : CLK=4 MOSI=2 MISO=47 CS_N=45 INT_N=3 (PMOD1A)
|
||||
# W5100 : indirect parallel bus — 15 pins across PMOD1B + PMOD2.
|
||||
# ADDR[1:0]=43 38 DATA[7:0]=34 31 42 36 32 28 27 25
|
||||
# CS_N=21 RD_N=19 WR_N=26 INT_N=23 RST_N=20 (pin 18 free)
|
||||
# Board: tie the W5100's upper address lines A[14:2] to 0 (only A[1:0] wired);
|
||||
# DATA[7:0] is bidirectional (SB_IO tristate, single shared output-enable).
|
||||
|
||||
class IceBreakerPlatform(LatticeICE40Platform):
|
||||
device = "iCE40UP5K"
|
||||
package = "SG48"
|
||||
default_clk = "clk12"
|
||||
|
||||
resources = [
|
||||
Resource("clk12", 0,
|
||||
Pins("35", dir="i"),
|
||||
Clock(12e6),
|
||||
Attrs(GLOBAL=True, IO_STANDARD="SB_LVCMOS")),
|
||||
|
||||
# EXI interface (GC side, SPI Mode 3) — PMOD1A FPGA pins
|
||||
Resource("exi", 0,
|
||||
Subsignal("clk", Pins("4", dir="i")),
|
||||
Subsignal("mosi", Pins("2", dir="i")),
|
||||
Subsignal("miso", Pins("47", dir="o")),
|
||||
Subsignal("cs_n", Pins("45", dir="i")),
|
||||
Subsignal("int_n", Pins("3", dir="o")),
|
||||
Attrs(IO_STANDARD="SB_LVCMOS")),
|
||||
|
||||
# W5100 indirect parallel bus — PMOD1B + PMOD2 FPGA pins
|
||||
Resource("w5100", 0,
|
||||
Subsignal("addr", Pins("43 38", dir="o")),
|
||||
Subsignal("data", Pins("34 31 42 36 32 28 27 25", dir="io")),
|
||||
Subsignal("cs_n", Pins("21", dir="o")),
|
||||
Subsignal("rd_n", Pins("19", dir="o")),
|
||||
Subsignal("wr_n", Pins("26", dir="o")),
|
||||
Subsignal("int_n", Pins("23", dir="i")),
|
||||
Subsignal("rst_n", Pins("20", dir="o")),
|
||||
Attrs(IO_STANDARD="SB_LVCMOS")),
|
||||
|
||||
# Bring-up status panel → iCEbreaker ONBOARD parts (dedicated pins, not
|
||||
# on any PMOD, so they coexist with EXI + W5100). LEDR/LEDG are
|
||||
# active-low discrete LEDs; BTN_N is the user button.
|
||||
# (The onboard RGB LED on pins 39/40/41 needs an SB_RGBA_DRV instance
|
||||
# wired to raw pads — board/version-specific — left as a future add-on
|
||||
# to expose rx/tx/ready as colours; the 2 discrete LEDs cover bring-up.)
|
||||
Resource("ledr", 0, Pins("11", dir="o"), Attrs(IO_STANDARD="SB_LVCMOS")),
|
||||
Resource("ledg", 0, Pins("37", dir="o"), Attrs(IO_STANDARD="SB_LVCMOS")),
|
||||
Resource("btn", 0, Pins("10", dir="i"), Attrs(IO_STANDARD="SB_LVCMOS")),
|
||||
]
|
||||
|
||||
connectors = []
|
||||
|
||||
def toolchain_program(self, products, name):
|
||||
iceprog = os.environ.get("ICEPROG", "iceprog")
|
||||
with products.extract(f"{name}.bin") as bitstream_filename:
|
||||
subprocess.check_call([iceprog, bitstream_filename])
|
||||
|
||||
|
||||
# ── BBATop with platform resource wiring ─────────────────────────────────
|
||||
|
||||
class BBATopSynth(BBATop):
|
||||
"""BBATop with platform pin connections added in elaborate()."""
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = super().elaborate(platform)
|
||||
|
||||
if platform is not None:
|
||||
exi = platform.request("exi", 0)
|
||||
w5100 = platform.request("w5100", 0)
|
||||
|
||||
m.d.comb += [
|
||||
self.exi_clk .eq(exi.clk.i),
|
||||
self.exi_mosi .eq(exi.mosi.i),
|
||||
self.exi_cs_n .eq(exi.cs_n.i),
|
||||
exi.miso.o .eq(self.exi_miso),
|
||||
exi.int_n.o .eq(self.int_n),
|
||||
|
||||
# W5100 parallel bus (DATA[7:0] bidirectional via SB_IO)
|
||||
w5100.addr.o .eq(self.w5100_addr),
|
||||
w5100.data.o .eq(self.w5100_data_o),
|
||||
w5100.data.oe .eq(self.w5100_data_oe),
|
||||
self.w5100_data_i.eq(w5100.data.i),
|
||||
w5100.cs_n.o .eq(self.w5100_cs_n),
|
||||
w5100.rd_n.o .eq(self.w5100_rd_n),
|
||||
w5100.wr_n.o .eq(self.w5100_wr_n),
|
||||
self.w5100_int_n .eq(w5100.int_n.i),
|
||||
w5100.rst_n.o .eq(self.w5100_rst_n),
|
||||
]
|
||||
|
||||
# ── Bring-up status panel → onboard LEDs / button ──────────────
|
||||
# Two discrete LEDs answer the #1 bring-up question on a real GC:
|
||||
# LEDG = heartbeat (clock alive) LEDR = EXI activity (GC talking)
|
||||
# The one onboard button → panel btn[1] (manual re-init).
|
||||
if self._status_panel:
|
||||
ledr = platform.request("ledr", 0)
|
||||
ledg = platform.request("ledg", 0)
|
||||
btn = platform.request("btn", 0)
|
||||
led = self.panel_led
|
||||
|
||||
m.d.comb += [
|
||||
ledg.o.eq(~led[0]), # heartbeat (active-low LED)
|
||||
ledr.o.eq(~led[1]), # EXI activity (active-low LED)
|
||||
# btn[0]/[2] held released (active-low idle = 1)
|
||||
self.panel_btn.eq(Cat(C(1, 1), btn.i, C(1, 1))),
|
||||
]
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Entry point ───────────────────────────────────────────────────────────
|
||||
#
|
||||
# Seed sweep: nextpnr placement is stochastic. With ~22% LC utilisation
|
||||
# routing dominates timing, so different seeds can vary fmax by ±20%.
|
||||
# Pass --seeds N to try N seeds (default 1, i.e. seed 1 only).
|
||||
# The build directory is reused across seeds; the final artefact in
|
||||
# build/top.bin is the result of the last (or best) seed tried.
|
||||
|
||||
if __name__ == "__main__":
|
||||
do_flash = "--flash" in sys.argv
|
||||
n_seeds = next((int(sys.argv[i+1]) for i, a in enumerate(sys.argv)
|
||||
if a == "--seeds"), 1)
|
||||
|
||||
platform = IceBreakerPlatform()
|
||||
print(f"Synthesizing BBATop for {platform.device}-{platform.package} "
|
||||
f"(do_program={do_flash}, seeds=1..{n_seeds})")
|
||||
|
||||
best_seed = 1
|
||||
best_fmax = 0.0
|
||||
for seed in range(1, n_seeds + 1):
|
||||
print(f"\n{'='*60}")
|
||||
print(f" Seed {seed}/{n_seeds}")
|
||||
print(f"{'='*60}")
|
||||
opts = (f"--opt-timing --seed {seed} --timing-allow-fail")
|
||||
try:
|
||||
platform.build(BBATopSynth(status_panel=True), do_program=False,
|
||||
verbose=True, nextpnr_opts=opts)
|
||||
except Exception as exc:
|
||||
# nextpnr exits non-zero even with --timing-allow-fail on some
|
||||
# versions; treat as non-fatal timing failure.
|
||||
print(f" [seed {seed}] build exception (timing?): {exc}")
|
||||
|
||||
# Parse fmax from nextpnr log in build/top.tim (if present)
|
||||
import glob, re
|
||||
tim_files = glob.glob("build/top.tim") + glob.glob("build/*.tim")
|
||||
fmax_exi = 0.0
|
||||
for tf in tim_files:
|
||||
try:
|
||||
with open(tf) as f:
|
||||
for line in f:
|
||||
m_ = re.search(
|
||||
r"Max frequency.*exi.*?:\s*([\d.]+)\s*MHz", line)
|
||||
if m_:
|
||||
fmax_exi = float(m_.group(1))
|
||||
except OSError:
|
||||
pass
|
||||
print(f" [seed {seed}] exi fmax extracted: {fmax_exi:.1f} MHz")
|
||||
if fmax_exi > best_fmax:
|
||||
best_fmax = fmax_exi
|
||||
best_seed = seed
|
||||
|
||||
print(f"\nBest seed: {best_seed} exi fmax: {best_fmax:.1f} MHz")
|
||||
|
||||
if do_flash:
|
||||
print(f"\nFlashing with seed {best_seed}...")
|
||||
opts = f"--opt-timing --seed {best_seed} --timing-allow-fail"
|
||||
platform.build(BBATopSynth(status_panel=True), do_program=True,
|
||||
verbose=True, nextpnr_opts=opts)
|
||||
|
||||
print("Done.")
|
||||
@@ -0,0 +1,253 @@
|
||||
"""TX frame drain — sync domain (24 MHz).
|
||||
|
||||
Drains the tx_bytes AsyncFIFO (written by BBARegisterFile in the exi domain),
|
||||
forwards each byte to W5500SPIMaster with SOF/EOF framing, then pulses tx_irq
|
||||
to notify the GC that the transmit is complete.
|
||||
|
||||
Flow
|
||||
----
|
||||
1. Wait for tx_len FIFO to have a length word (signals a complete frame queued).
|
||||
2. Pop the length from tx_len FIFO.
|
||||
3. Assert tx_sof on first byte, tx_eof on last byte, consuming tx_bytes FIFO.
|
||||
4. When W5500SPIMaster accepts the final byte: pulse tx_irq.
|
||||
|
||||
The tx_bytes AsyncFIFO (exi→sync, 8-bit, depth=16) and tx_ctrl FIFO (exi→sync,
|
||||
16-bit, depth=4) are instantiated in BBARegisterFile and their sync-domain read
|
||||
sides are exposed as ports wired here by BBATop.
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
|
||||
__all__ = ["TXFrameDrain"]
|
||||
|
||||
|
||||
class TXFrameDrain(Elaboratable):
|
||||
"""Drains BBA TX FIFOs and forwards frames to W5500SPIMaster.
|
||||
|
||||
TX FIFO read interfaces (async FIFOs, sync-domain read side)
|
||||
---------------------------------------------------------------
|
||||
tx_bytes_r_data / tx_bytes_r_en / tx_bytes_r_rdy : byte stream
|
||||
tx_ctrl_r_data / tx_ctrl_r_en / tx_ctrl_r_rdy : 16-bit frame length
|
||||
|
||||
W5500 streaming output (sync domain, to W5500SPIMaster)
|
||||
-------------------------------------------------------
|
||||
tx_data / tx_valid / tx_ready / tx_sof / tx_eof
|
||||
|
||||
CDC output (sync→exi, via PulseSynchronizer in BBATop)
|
||||
-------------------------------------------------------
|
||||
tx_irq : 1-cycle pulse when frame transmission is handed off to W5500SPIMaster
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# tx_bytes FIFO read side
|
||||
self.tx_bytes_r_data = Signal(8)
|
||||
self.tx_bytes_r_en = Signal()
|
||||
self.tx_bytes_r_rdy = Signal()
|
||||
|
||||
# tx_ctrl FIFO read side (frame length)
|
||||
self.tx_ctrl_r_data = Signal(16)
|
||||
self.tx_ctrl_r_en = Signal()
|
||||
self.tx_ctrl_r_rdy = Signal()
|
||||
|
||||
# W5500 streaming TX interface
|
||||
self.tx_data = Signal(8)
|
||||
self.tx_valid = Signal()
|
||||
self.tx_ready = Signal()
|
||||
self.tx_sof = Signal()
|
||||
self.tx_eof = Signal()
|
||||
|
||||
# TX done pulse → PulseSynchronizer
|
||||
self.tx_irq = Signal()
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
|
||||
frame_len = Signal(16) # bytes still to LOAD from FIFO (incl. held one)
|
||||
is_first = Signal() # next byte loaded is the first (SOF)
|
||||
load_pending = Signal() # 1-bit "more bytes to load" flag (replaces
|
||||
# a 16-bit frame_len!=0 compare in the
|
||||
# combinational FIFO read-enable path)
|
||||
|
||||
# ── Registered holding stage presented to W5500 ──────────────────
|
||||
# All W5500-facing outputs are driven from these registers. This
|
||||
# breaks the long combinational path that previously ran from the
|
||||
# tx_bytes FIFO read pointer, out through W5500 (tx_ready) and the
|
||||
# is_first/eof logic, and back into the FIFO pointer increment — the
|
||||
# sync-domain critical path. The FIFO read-enable now depends only on
|
||||
# the registered hold_valid and the FIFO's own r_rdy.
|
||||
hold_data = Signal(8)
|
||||
hold_valid = Signal()
|
||||
hold_sof = Signal()
|
||||
hold_eof = Signal()
|
||||
|
||||
m.d.sync += self.tx_irq.eq(0) # default
|
||||
|
||||
m.d.comb += [
|
||||
self.tx_data .eq(hold_data),
|
||||
self.tx_valid.eq(hold_valid),
|
||||
self.tx_sof .eq(hold_sof),
|
||||
self.tx_eof .eq(hold_eof),
|
||||
]
|
||||
|
||||
# W5500 took the currently-held byte this cycle
|
||||
hold_consumed = Signal()
|
||||
m.d.comb += hold_consumed.eq(hold_valid & self.tx_ready)
|
||||
|
||||
# FIFO read-enable defaults (combinational, no W5500 dependency)
|
||||
m.d.comb += self.tx_bytes_r_en.eq(0)
|
||||
m.d.comb += self.tx_ctrl_r_en .eq(0)
|
||||
|
||||
with m.FSM(domain="sync", name="tx_fsm"):
|
||||
|
||||
with m.State("IDLE"):
|
||||
# Wait for a complete frame length in tx_ctrl FIFO
|
||||
with m.If(self.tx_ctrl_r_rdy):
|
||||
m.d.comb += self.tx_ctrl_r_en.eq(1)
|
||||
m.d.sync += frame_len.eq(self.tx_ctrl_r_data)
|
||||
m.d.sync += is_first.eq(1)
|
||||
# A frame with length 0 has nothing to load.
|
||||
m.d.sync += load_pending.eq(self.tx_ctrl_r_data != 0)
|
||||
m.next = "DRAIN"
|
||||
|
||||
with m.State("DRAIN"):
|
||||
# Load the next byte into the holding register only when it is
|
||||
# empty. Costs one idle sync cycle per byte, negligible
|
||||
# against the W5500 SPI rate (~16 sync cycles/byte), and keeps
|
||||
# tx_ready off the FIFO read-enable path entirely.
|
||||
#
|
||||
# The gate uses the registered 1-bit load_pending instead of a
|
||||
# 16-bit (frame_len != 0) reduction, so the combinational path
|
||||
# consume_r_gry → r_rdy → do_load → tx_bytes_r_en stays shallow.
|
||||
do_load = Signal()
|
||||
m.d.comb += do_load.eq(
|
||||
~hold_valid & self.tx_bytes_r_rdy & load_pending
|
||||
)
|
||||
m.d.comb += self.tx_bytes_r_en.eq(do_load)
|
||||
|
||||
with m.If(hold_consumed):
|
||||
m.d.sync += hold_valid.eq(0)
|
||||
with m.If(hold_eof):
|
||||
m.d.sync += self.tx_irq.eq(1)
|
||||
m.next = "IDLE"
|
||||
|
||||
with m.If(do_load):
|
||||
m.d.sync += hold_data .eq(self.tx_bytes_r_data)
|
||||
m.d.sync += hold_valid.eq(1)
|
||||
m.d.sync += hold_sof .eq(is_first)
|
||||
m.d.sync += hold_eof .eq(frame_len == 1)
|
||||
m.d.sync += is_first .eq(0)
|
||||
m.d.sync += frame_len .eq(frame_len - 1)
|
||||
# Last byte just loaded → stop further loads (registered).
|
||||
with m.If(frame_len == 1):
|
||||
m.d.sync += load_pending.eq(0)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Testbench ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from amaranth.sim import Simulator, Period
|
||||
|
||||
dut = TXFrameDrain()
|
||||
errors = []
|
||||
|
||||
async def _send_frame(ctx, frame):
|
||||
"""Drive one frame through the TXFrameDrain DUT.
|
||||
|
||||
Returns (received_bytes, seen_sof, seen_eof, saw_irq).
|
||||
|
||||
Key timing: tx_sof/tx_eof are combinatorial outputs that depend on
|
||||
registered signals (is_first, frame_len) BEFORE they update. We read
|
||||
them BEFORE each tick to capture the correct values, then advance the
|
||||
FIFO AFTER the tick.
|
||||
"""
|
||||
ctx.set(dut.tx_ctrl_r_data, len(frame))
|
||||
ctx.set(dut.tx_ctrl_r_rdy, 1)
|
||||
ctx.set(dut.tx_bytes_r_data, frame[0])
|
||||
ctx.set(dut.tx_bytes_r_rdy, 1)
|
||||
|
||||
# Tick 0: IDLE pops ctrl word (comb), FSM→DRAIN, frame_len registered
|
||||
await ctx.tick("sync").repeat(1)
|
||||
# Deassert ctrl FIFO so FSM doesn't re-pop when it returns to IDLE
|
||||
ctx.set(dut.tx_ctrl_r_rdy, 0)
|
||||
|
||||
received = []
|
||||
seen_sof = False
|
||||
seen_eof = False
|
||||
saw_irq = False
|
||||
|
||||
for _ in range(len(frame) + 10):
|
||||
# Read comb signals BEFORE the tick (is_first and frame_len still
|
||||
# reflect pre-tick registered values, so sof/eof are correct)
|
||||
if ctx.get(dut.tx_valid):
|
||||
d = ctx.get(dut.tx_data)
|
||||
sof = ctx.get(dut.tx_sof)
|
||||
eof = ctx.get(dut.tx_eof)
|
||||
received.append(d)
|
||||
seen_sof = seen_sof or sof
|
||||
seen_eof = seen_eof or eof
|
||||
|
||||
await ctx.tick("sync").repeat(1)
|
||||
|
||||
if ctx.get(dut.tx_irq):
|
||||
saw_irq = True
|
||||
break
|
||||
|
||||
# Advance FIFO AFTER the tick: present next byte for next tick
|
||||
if len(received) < len(frame):
|
||||
ctx.set(dut.tx_bytes_r_data, frame[len(received)])
|
||||
elif len(received) == len(frame):
|
||||
ctx.set(dut.tx_bytes_r_rdy, 0)
|
||||
|
||||
return received, seen_sof, seen_eof, saw_irq
|
||||
|
||||
async def testbench(ctx):
|
||||
await ctx.tick("sync").repeat(2)
|
||||
ctx.set(dut.tx_ready, 1)
|
||||
|
||||
# ── T1: 4-byte frame ─────────────────────────────────────────────────
|
||||
frame = [0xDE, 0xAD, 0xBE, 0xEF]
|
||||
received, seen_sof, seen_eof, saw_irq = await _send_frame(ctx, frame)
|
||||
|
||||
print(f"T1 received={[hex(b) for b in received]} sof={seen_sof} eof={seen_eof} tx_irq={saw_irq}")
|
||||
|
||||
if received != frame:
|
||||
errors.append(f"T1 bytes mismatch: got {received}, want {frame}")
|
||||
if not seen_sof:
|
||||
errors.append("T1: SOF never seen")
|
||||
if not seen_eof:
|
||||
errors.append("T1: EOF never seen")
|
||||
if not saw_irq:
|
||||
errors.append("T1: tx_irq never pulsed")
|
||||
|
||||
await ctx.tick("sync").repeat(4)
|
||||
|
||||
# ── T2: Single-byte frame — SOF and EOF on same byte ─────────────────
|
||||
frame2 = [0x42]
|
||||
received2, s2_sof, s2_eof, s2_irq = await _send_frame(ctx, frame2)
|
||||
|
||||
print(f"T2 byte=0x{received2[0] if received2 else 0:02X} sof={s2_sof} eof={s2_eof} tx_irq={s2_irq}")
|
||||
|
||||
if received2 != frame2:
|
||||
errors.append(f"T2: bytes wrong, got {received2}")
|
||||
if not (s2_sof and s2_eof):
|
||||
errors.append("T2: SOF+EOF both must be set for 1-byte frame")
|
||||
if not s2_irq:
|
||||
errors.append("T2: tx_irq not seen for 1-byte frame")
|
||||
|
||||
sim = Simulator(dut)
|
||||
sim.add_clock(Period(MHz=24), domain="sync")
|
||||
sim.add_testbench(testbench)
|
||||
|
||||
with sim.write_vcd("TXFrameDrain.vcd"):
|
||||
sim.run()
|
||||
|
||||
if errors:
|
||||
print("\nFAILURES:")
|
||||
for e in errors:
|
||||
print(" ", e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nAll tests passed.")
|
||||
@@ -0,0 +1,840 @@
|
||||
"""W5100 parallel-bus master — sync domain.
|
||||
|
||||
A drop-in alternative to `W5500SPIMaster` that talks to a WIZnet **W5100** over
|
||||
its **indirect parallel bus** instead of SPI. The external streaming interface
|
||||
(init_req/init_done/par, tx_*, rx_*) is identical, so BBATop wiring is unchanged;
|
||||
only the physical pins differ (a parallel bus instead of 4 SPI wires).
|
||||
|
||||
Why parallel
|
||||
------------
|
||||
SPI serialises 8 bits per byte, so on this UP5K (whose W5500-operating logic
|
||||
closes only ~40 MHz) the SPI byte rate caps at ~12 Mbit/s. A parallel bus moves
|
||||
a whole byte per access, so the same ~24 MHz sync logic clears the 27 Mbit/s EXI
|
||||
ceiling — the real hard limit — with margin. See CLAUDE.md.
|
||||
|
||||
W5100 indirect bus interface (IDM)
|
||||
----------------------------------
|
||||
Only two address lines A[1:0] are wired (the upper address lines are tied to 0
|
||||
on the board, so a power-up *direct*-mode access at A=00 still lands on MR):
|
||||
|
||||
A[1:0] register
|
||||
00 MR (Mode Register — also reachable directly at power-up)
|
||||
01 IDM_AR0 (indirect address, high byte)
|
||||
10 IDM_AR1 (indirect address, low byte)
|
||||
11 IDM_DR (indirect data — accesses mem[IDM_AR]; auto-increments
|
||||
IDM_AR when MR.AI is set)
|
||||
|
||||
So a register/buffer access is: write IDM_AR0/AR1 with the 16-bit address, then
|
||||
read/write IDM_DR. With MR.AI=1 a multi-byte block is one address-set followed
|
||||
by a burst of IDM_DR accesses (the chip auto-increments) — used for SHAR and for
|
||||
streaming frame data.
|
||||
|
||||
A bus cycle drives A + (for writes) D with /CS and /RD or /WR asserted for
|
||||
`strobe_cycles` sync clocks (≥ the W5100's ~80 ns access time at 24 MHz).
|
||||
|
||||
Phase status
|
||||
------------
|
||||
Phase 1 (this file): bus access engine + transaction engine + init sequence,
|
||||
verified against a W5100 bus model. TX/RX MACRAW (with socket-buffer ring
|
||||
wraparound) land in phases 2–3.
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
|
||||
__all__ = ["W5100ParallelMaster"]
|
||||
|
||||
# ── W5100 register addresses (indirect 16-bit address space) ────────────────
|
||||
_MR = 0x0000 # Mode register (common)
|
||||
_SHAR0 = 0x0009 # Source MAC, 6 bytes
|
||||
_IR = 0x0015 # Interrupt register
|
||||
_IMR = 0x0016 # Interrupt mask
|
||||
_RMSR = 0x001A # RX memory size (2 bits/socket)
|
||||
_TMSR = 0x001B # TX memory size
|
||||
_S0_MR = 0x0400 # Socket 0 mode
|
||||
_S0_CR = 0x0401 # Socket 0 command
|
||||
_S0_IR = 0x0402 # Socket 0 interrupt
|
||||
_S0_SR = 0x0403 # Socket 0 status
|
||||
_S0_TX_FSR = 0x0420 # Socket 0 TX free size (2 bytes)
|
||||
_S0_TX_RD = 0x0422 # Socket 0 TX read pointer
|
||||
_S0_TX_WR = 0x0424 # Socket 0 TX write pointer
|
||||
_S0_RX_RSR = 0x0426 # Socket 0 RX received size (2 bytes)
|
||||
_S0_RX_RD = 0x0428 # Socket 0 RX read pointer
|
||||
|
||||
_TX_BASE = 0x4000 # Socket 0 TX buffer base (default 2 KB window)
|
||||
_RX_BASE = 0x6000 # Socket 0 RX buffer base
|
||||
_S0_TX_MASK = 0x07FF # 2 KB ring mask
|
||||
_S0_RX_MASK = 0x07FF
|
||||
|
||||
# MR bits / command / mode values
|
||||
_MR_RST = 0x80
|
||||
_MR_AI = 0x02 # address auto-increment (indirect mode)
|
||||
_MR_IND = 0x01 # indirect bus interface mode
|
||||
_S0_MR_MACRAW = 0x04
|
||||
_CR_OPEN = 0x01
|
||||
_CR_SEND = 0x20
|
||||
_CR_RECV = 0x40
|
||||
|
||||
# Indirect-mode address selects (A[1:0])
|
||||
_A_MR = 0b00
|
||||
_A_AR0 = 0b01 # IDM_AR high byte
|
||||
_A_AR1 = 0b10 # IDM_AR low byte
|
||||
_A_DR = 0b11 # IDM_DR (data)
|
||||
|
||||
|
||||
class W5100ParallelMaster(Elaboratable):
|
||||
"""W5100 master over the indirect parallel bus, sync clock domain.
|
||||
|
||||
Physical bus pins
|
||||
-----------------
|
||||
bus_addr : A[1:0] output
|
||||
bus_data_o : D[7:0] output value (drive when bus_data_oe=1)
|
||||
bus_data_oe: data-bus output enable (1=FPGA drives D, 0=W5100 drives D)
|
||||
bus_data_i : D[7:0] input value (sampled during reads)
|
||||
cs_n / rd_n / wr_n : bus control (active low)
|
||||
w5100_int_n : W5100 INT_N input (active low)
|
||||
w5100_rst_n : W5100 hardware reset (active low)
|
||||
|
||||
Init / TX / RX interfaces are identical to W5500SPIMaster.
|
||||
"""
|
||||
|
||||
def __init__(self, strobe_cycles=3, reset_cycles=24000):
|
||||
# /RD//WR strobe width in sync cycles (≥ W5100 access time).
|
||||
self._strobe = strobe_cycles
|
||||
# MR-reset settle wait; testbench overrides with a small value.
|
||||
self._reset_cycles = reset_cycles
|
||||
|
||||
# Physical parallel bus
|
||||
self.bus_addr = Signal(2)
|
||||
self.bus_data_o = Signal(8)
|
||||
self.bus_data_oe = Signal()
|
||||
self.bus_data_i = Signal(8)
|
||||
self.cs_n = Signal(init=1)
|
||||
self.rd_n = Signal(init=1)
|
||||
self.wr_n = Signal(init=1)
|
||||
self.w5100_int_n = Signal(init=1)
|
||||
self.w5100_rst_n = Signal(init=1)
|
||||
|
||||
# Init control
|
||||
self.init_req = Signal()
|
||||
self.init_done = Signal()
|
||||
self.par = Signal(48) # MAC address (PAR0..5 packed)
|
||||
|
||||
# TX stream
|
||||
self.tx_data = Signal(8)
|
||||
self.tx_valid = Signal()
|
||||
self.tx_ready = Signal()
|
||||
self.tx_sof = Signal()
|
||||
self.tx_eof = Signal()
|
||||
|
||||
# RX stream
|
||||
self.rx_data = Signal(8)
|
||||
self.rx_valid = Signal()
|
||||
self.rx_ready = Signal()
|
||||
self.rx_sof = Signal()
|
||||
self.rx_eof = Signal()
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
STROBE = self._strobe
|
||||
|
||||
# ── Bus access engine: one indirect-bus read or write cycle ──────────
|
||||
bus_go = Signal()
|
||||
bus_rw = Signal() # 1 = write, 0 = read
|
||||
bus_a = Signal(2)
|
||||
bus_wdata = Signal(8)
|
||||
bus_rdata = Signal(8)
|
||||
bus_done = Signal()
|
||||
bus_ctr = Signal(range(STROBE + 2))
|
||||
rw_r = Signal()
|
||||
|
||||
# registered physical outputs
|
||||
a_o = Signal(2)
|
||||
d_o = Signal(8)
|
||||
d_oe = Signal()
|
||||
cs_r = Signal(init=1)
|
||||
rd_r = Signal(init=1)
|
||||
wr_r = Signal(init=1)
|
||||
m.d.comb += [
|
||||
self.bus_addr .eq(a_o),
|
||||
self.bus_data_o .eq(d_o),
|
||||
self.bus_data_oe.eq(d_oe),
|
||||
self.cs_n .eq(cs_r),
|
||||
self.rd_n .eq(rd_r),
|
||||
self.wr_n .eq(wr_r),
|
||||
]
|
||||
|
||||
m.d.sync += bus_done.eq(0)
|
||||
with m.FSM(domain="sync", name="bus_fsm"):
|
||||
with m.State("IDLE"):
|
||||
m.d.sync += [cs_r.eq(1), rd_r.eq(1), wr_r.eq(1), d_oe.eq(0)]
|
||||
with m.If(bus_go):
|
||||
m.d.sync += [a_o.eq(bus_a), rw_r.eq(bus_rw),
|
||||
cs_r.eq(0), bus_ctr.eq(0)]
|
||||
with m.If(bus_rw):
|
||||
m.d.sync += [d_o.eq(bus_wdata), d_oe.eq(1), wr_r.eq(0)]
|
||||
with m.Else():
|
||||
m.d.sync += rd_r.eq(0)
|
||||
m.next = "STROBE"
|
||||
with m.State("STROBE"):
|
||||
m.d.sync += bus_ctr.eq(bus_ctr + 1)
|
||||
with m.If(bus_ctr == STROBE - 1):
|
||||
with m.If(~rw_r):
|
||||
m.d.sync += bus_rdata.eq(self.bus_data_i) # sample read
|
||||
m.d.sync += [rd_r.eq(1), wr_r.eq(1)]
|
||||
m.next = "FINISH"
|
||||
with m.State("FINISH"):
|
||||
m.d.sync += [cs_r.eq(1), d_oe.eq(0), bus_done.eq(1)]
|
||||
m.next = "IDLE"
|
||||
|
||||
# ── Transaction engine: address-set + payload over the bus engine ────
|
||||
WBUF = 8
|
||||
xfer_start = Signal()
|
||||
xfer_direct = Signal() # 1 = single A=00 access (MR), addr ignored
|
||||
xfer_addr = Signal(16)
|
||||
xfer_rw = Signal() # payload direction: 1=write, 0=read
|
||||
xfer_len = Signal(range(WBUF + 1))
|
||||
xfer_stream = Signal() # stream-write payload from s_*
|
||||
xfer_sread = Signal() # stream-read payload to r_*
|
||||
xfer_rcount = Signal(16)
|
||||
xfer_done = Signal()
|
||||
|
||||
wbuf = Array([Signal(8, name=f"wbuf{i}") for i in range(WBUF)])
|
||||
rbuf = Array([Signal(8, name=f"rbuf{i}") for i in range(WBUF)])
|
||||
s_count = Signal(16) # bytes streamed-written (advances pointers)
|
||||
xfer_idx = Signal(range(WBUF + 1))
|
||||
s_last_r = Signal()
|
||||
r_idx = Signal(16)
|
||||
|
||||
# Streaming payload interfaces.
|
||||
s_data, s_valid, s_last, s_consume = Signal(8), Signal(), Signal(), Signal()
|
||||
r_data, r_valid, r_first, r_last, r_ready = (
|
||||
Signal(8), Signal(), Signal(), Signal(), Signal())
|
||||
# TX stream source = external tx interface (Phase 2).
|
||||
m.d.comb += [s_data.eq(self.tx_data), s_valid.eq(self.tx_valid),
|
||||
s_last.eq(self.tx_eof), self.tx_ready.eq(s_consume)]
|
||||
# RX stream sink = external rx interface (Phase 3).
|
||||
m.d.comb += [self.rx_data.eq(r_data), self.rx_valid.eq(r_valid),
|
||||
self.rx_sof.eq(r_first), self.rx_eof.eq(r_last),
|
||||
r_ready.eq(self.rx_ready)]
|
||||
|
||||
# Socket-buffer ring wraparound. Unlike the W5500, the W5100's IDM
|
||||
# address does NOT auto-wrap at the socket-buffer boundary — it just
|
||||
# increments linearly into the next region. So when a streamed access
|
||||
# reaches `xfer_wend`, the engine re-sets IDM_AR back to `xfer_wbase`.
|
||||
xfer_wrap = Signal()
|
||||
xfer_wbase = Signal(16)
|
||||
xfer_wend = Signal(16)
|
||||
cur_addr = Signal(16)
|
||||
|
||||
m.d.comb += [bus_go.eq(0), bus_rw.eq(0), bus_a.eq(0), bus_wdata.eq(0)]
|
||||
m.d.comb += [s_consume.eq(0), r_valid.eq(0), r_data.eq(0),
|
||||
r_first.eq(0), r_last.eq(0)]
|
||||
m.d.sync += xfer_done.eq(0)
|
||||
|
||||
def bus_write(a, data):
|
||||
m.d.comb += [bus_go.eq(1), bus_rw.eq(1), bus_a.eq(a), bus_wdata.eq(data)]
|
||||
|
||||
def bus_read(a):
|
||||
m.d.comb += [bus_go.eq(1), bus_rw.eq(0), bus_a.eq(a)]
|
||||
|
||||
with m.FSM(domain="sync", name="xfer_fsm"):
|
||||
with m.State("IDLE"):
|
||||
with m.If(xfer_start):
|
||||
m.d.sync += [xfer_idx.eq(0), s_count.eq(0), r_idx.eq(0),
|
||||
cur_addr.eq(xfer_addr)]
|
||||
with m.If(xfer_direct):
|
||||
m.next = "DIRECT"
|
||||
with m.Else():
|
||||
m.next = "AR_HI"
|
||||
|
||||
# Direct MR write (A=00)
|
||||
with m.State("DIRECT"):
|
||||
bus_write(_A_MR, wbuf[0])
|
||||
m.next = "DIRECT_W"
|
||||
with m.State("DIRECT_W"):
|
||||
with m.If(bus_done):
|
||||
m.next = "FINISH"
|
||||
|
||||
# Set indirect address IDM_AR (high then low)
|
||||
with m.State("AR_HI"):
|
||||
bus_write(_A_AR0, xfer_addr[8:16])
|
||||
m.next = "AR_HI_W"
|
||||
with m.State("AR_HI_W"):
|
||||
with m.If(bus_done):
|
||||
m.next = "AR_LO"
|
||||
with m.State("AR_LO"):
|
||||
bus_write(_A_AR1, xfer_addr[0:8])
|
||||
m.next = "AR_LO_W"
|
||||
with m.State("AR_LO_W"):
|
||||
with m.If(bus_done):
|
||||
with m.If(xfer_stream):
|
||||
m.next = "SW_LOAD"
|
||||
with m.Elif(xfer_sread):
|
||||
m.next = "SR_LOAD"
|
||||
with m.Elif(xfer_rw):
|
||||
m.next = "WB_ISSUE"
|
||||
with m.Else():
|
||||
m.next = "RB_ISSUE"
|
||||
|
||||
# Fixed-length write from wbuf (IDM_DR burst, auto-increment)
|
||||
with m.State("WB_ISSUE"):
|
||||
bus_write(_A_DR, wbuf[xfer_idx])
|
||||
m.next = "WB_WAIT"
|
||||
with m.State("WB_WAIT"):
|
||||
with m.If(bus_done):
|
||||
m.d.sync += xfer_idx.eq(xfer_idx + 1)
|
||||
with m.If(xfer_idx + 1 == xfer_len):
|
||||
m.next = "FINISH"
|
||||
with m.Else():
|
||||
m.next = "WB_ISSUE"
|
||||
|
||||
# Fixed-length read into rbuf (with ring wrap, for the length header)
|
||||
with m.State("RB_ISSUE"):
|
||||
with m.If(xfer_wrap & (cur_addr == xfer_wend)):
|
||||
m.next = "RB_WRAP_HI"
|
||||
with m.Else():
|
||||
bus_read(_A_DR)
|
||||
m.next = "RB_WAIT"
|
||||
with m.State("RB_WAIT"):
|
||||
with m.If(bus_done):
|
||||
m.d.sync += rbuf[xfer_idx].eq(bus_rdata)
|
||||
m.d.sync += [xfer_idx.eq(xfer_idx + 1), cur_addr.eq(cur_addr + 1)]
|
||||
with m.If(xfer_idx + 1 == xfer_len):
|
||||
m.next = "FINISH"
|
||||
with m.Else():
|
||||
m.next = "RB_ISSUE"
|
||||
with m.State("RB_WRAP_HI"):
|
||||
bus_write(_A_AR0, xfer_wbase[8:16])
|
||||
m.next = "RB_WRAP_HI_W"
|
||||
with m.State("RB_WRAP_HI_W"):
|
||||
with m.If(bus_done):
|
||||
m.next = "RB_WRAP_LO"
|
||||
with m.State("RB_WRAP_LO"):
|
||||
bus_write(_A_AR1, xfer_wbase[0:8])
|
||||
m.next = "RB_WRAP_LO_W"
|
||||
with m.State("RB_WRAP_LO_W"):
|
||||
with m.If(bus_done):
|
||||
m.d.sync += cur_addr.eq(xfer_wbase)
|
||||
m.next = "RB_ISSUE"
|
||||
|
||||
# Stream-write payload from s_* until s_last (with ring wrap)
|
||||
with m.State("SW_LOAD"):
|
||||
with m.If(xfer_wrap & (cur_addr == xfer_wend)):
|
||||
m.next = "SW_WRAP_HI"
|
||||
with m.Elif(s_valid):
|
||||
bus_write(_A_DR, s_data)
|
||||
m.d.sync += s_last_r.eq(s_last)
|
||||
m.next = "SW_WAIT"
|
||||
with m.State("SW_WAIT"):
|
||||
with m.If(bus_done):
|
||||
m.d.comb += s_consume.eq(1)
|
||||
m.d.sync += [s_count.eq(s_count + 1), cur_addr.eq(cur_addr + 1)]
|
||||
with m.If(s_last_r):
|
||||
m.next = "FINISH"
|
||||
with m.Else():
|
||||
m.next = "SW_LOAD"
|
||||
with m.State("SW_WRAP_HI"):
|
||||
bus_write(_A_AR0, xfer_wbase[8:16])
|
||||
m.next = "SW_WRAP_HI_W"
|
||||
with m.State("SW_WRAP_HI_W"):
|
||||
with m.If(bus_done):
|
||||
m.next = "SW_WRAP_LO"
|
||||
with m.State("SW_WRAP_LO"):
|
||||
bus_write(_A_AR1, xfer_wbase[0:8])
|
||||
m.next = "SW_WRAP_LO_W"
|
||||
with m.State("SW_WRAP_LO_W"):
|
||||
with m.If(bus_done):
|
||||
m.d.sync += cur_addr.eq(xfer_wbase)
|
||||
m.next = "SW_LOAD"
|
||||
|
||||
# Stream-read payload to r_* for rcount bytes (with ring wrap)
|
||||
with m.State("SR_LOAD"):
|
||||
with m.If(r_idx == xfer_rcount):
|
||||
m.next = "FINISH"
|
||||
with m.Elif(xfer_wrap & (cur_addr == xfer_wend)):
|
||||
m.next = "SR_WRAP_HI"
|
||||
with m.Else():
|
||||
bus_read(_A_DR)
|
||||
m.next = "SR_WAIT"
|
||||
with m.State("SR_WAIT"):
|
||||
with m.If(bus_done):
|
||||
m.next = "SR_PUSH"
|
||||
with m.State("SR_PUSH"):
|
||||
m.d.comb += [r_data.eq(bus_rdata), r_valid.eq(1),
|
||||
r_first.eq(r_idx == 0),
|
||||
r_last.eq(r_idx + 1 == xfer_rcount)]
|
||||
with m.If(r_ready):
|
||||
m.d.sync += [r_idx.eq(r_idx + 1), cur_addr.eq(cur_addr + 1)]
|
||||
m.next = "SR_LOAD"
|
||||
with m.State("SR_WRAP_HI"):
|
||||
bus_write(_A_AR0, xfer_wbase[8:16])
|
||||
m.next = "SR_WRAP_HI_W"
|
||||
with m.State("SR_WRAP_HI_W"):
|
||||
with m.If(bus_done):
|
||||
m.next = "SR_WRAP_LO"
|
||||
with m.State("SR_WRAP_LO"):
|
||||
bus_write(_A_AR1, xfer_wbase[0:8])
|
||||
m.next = "SR_WRAP_LO_W"
|
||||
with m.State("SR_WRAP_LO_W"):
|
||||
with m.If(bus_done):
|
||||
m.d.sync += cur_addr.eq(xfer_wbase)
|
||||
m.next = "SR_LOAD"
|
||||
|
||||
with m.State("FINISH"):
|
||||
m.d.sync += xfer_done.eq(1)
|
||||
m.next = "IDLE"
|
||||
|
||||
# ── Control regs ─────────────────────────────────────────────────────
|
||||
mac_shadow = Array([Signal(8, name=f"mac{i}") for i in range(6)])
|
||||
wait_ctr = Signal(range(self._reset_cycles + 2))
|
||||
tx_wr = Signal(16)
|
||||
rx_rsr = Signal(16)
|
||||
rx_rd = Signal(16)
|
||||
pkt_len = Signal(16)
|
||||
|
||||
def write_reg(name, addr, payload, nxt, direct=False):
|
||||
"""Emit a 2-state block that writes `payload` (a list) to `addr`."""
|
||||
with m.State(name):
|
||||
m.d.sync += [xfer_addr.eq(addr), xfer_rw.eq(1),
|
||||
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
|
||||
xfer_direct.eq(1 if direct else 0),
|
||||
xfer_len.eq(len(payload))]
|
||||
for i, b in enumerate(payload):
|
||||
m.d.sync += wbuf[i].eq(b)
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = name + "_W"
|
||||
with m.State(name + "_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.next = nxt
|
||||
|
||||
# ── Main control FSM (Phase 1: init only) ────────────────────────────
|
||||
with m.FSM(domain="sync", name="main_fsm"):
|
||||
with m.State("IDLE"):
|
||||
m.d.sync += self.init_done.eq(0)
|
||||
with m.If(self.init_req):
|
||||
for i in range(6):
|
||||
m.d.sync += mac_shadow[i].eq(self.par[i*8:(i+1)*8])
|
||||
m.next = "MR_RST"
|
||||
with m.Elif(~self.w5100_int_n):
|
||||
m.next = "RX_CHECK"
|
||||
with m.Elif(self.tx_valid & self.tx_sof):
|
||||
m.next = "TX_START"
|
||||
|
||||
# MR = 0x80 software reset (direct A=00), then settle.
|
||||
write_reg("MR_RST", _MR, [_MR_RST], "MR_WAIT", direct=True)
|
||||
with m.State("MR_WAIT"):
|
||||
with m.If(wait_ctr == self._reset_cycles):
|
||||
m.d.sync += wait_ctr.eq(0)
|
||||
m.next = "MR_MODE"
|
||||
with m.Else():
|
||||
m.d.sync += wait_ctr.eq(wait_ctr + 1)
|
||||
|
||||
# MR = indirect + auto-increment (direct A=00).
|
||||
write_reg("MR_MODE", _MR, [_MR_IND | _MR_AI], "SHAR", direct=True)
|
||||
|
||||
# SHAR = source MAC (6-byte auto-increment burst).
|
||||
with m.State("SHAR"):
|
||||
m.d.sync += [xfer_addr.eq(_SHAR0), xfer_rw.eq(1),
|
||||
xfer_stream.eq(0), xfer_sread.eq(0),
|
||||
xfer_direct.eq(0), xfer_len.eq(6)]
|
||||
for i in range(6):
|
||||
m.d.sync += wbuf[i].eq(mac_shadow[i])
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "SHAR_W"
|
||||
with m.State("SHAR_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.next = "MEMSZ"
|
||||
|
||||
# RMSR/TMSR = 0x55 (2 KB per socket — default; socket 0 used).
|
||||
write_reg("MEMSZ", _RMSR, [0x55, 0x55], "S0_MODE") # RMSR then TMSR
|
||||
# Socket 0: MACRAW mode, OPEN, enable interrupt.
|
||||
write_reg("S0_MODE", _S0_MR, [_S0_MR_MACRAW], "S0_OPEN")
|
||||
write_reg("S0_OPEN", _S0_CR, [_CR_OPEN], "S0_IMR")
|
||||
write_reg("S0_IMR", _IMR, [0x01], "INIT_DONE") # enable S0 IRQ
|
||||
|
||||
with m.State("INIT_DONE"):
|
||||
m.d.sync += self.init_done.eq(1)
|
||||
m.next = "IDLE"
|
||||
|
||||
# ── TX MACRAW ────────────────────────────────────────────────────
|
||||
# read S0_TX_WR → stream frame into the TX buffer at that offset
|
||||
# (ring-wrapping at the 2 KB boundary) → advance S0_TX_WR → SEND.
|
||||
with m.State("TX_START"): # read S0_TX_WR (2 bytes)
|
||||
m.d.sync += [xfer_addr.eq(_S0_TX_WR), xfer_rw.eq(0),
|
||||
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
|
||||
xfer_direct.eq(0), xfer_len.eq(2)]
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "TX_RDPTR_W"
|
||||
with m.State("TX_RDPTR_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += tx_wr.eq(Cat(rbuf[1], rbuf[0])) # big-endian
|
||||
m.next = "TX_DATA"
|
||||
|
||||
with m.State("TX_DATA"): # stream frame → TX buffer
|
||||
m.d.sync += [xfer_addr.eq(_TX_BASE + (tx_wr & _S0_TX_MASK)),
|
||||
xfer_rw.eq(1), xfer_stream.eq(1), xfer_sread.eq(0),
|
||||
xfer_direct.eq(0), xfer_wrap.eq(1),
|
||||
xfer_wbase.eq(_TX_BASE),
|
||||
xfer_wend.eq(_TX_BASE + _S0_TX_MASK + 1)]
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "TX_DATA_W"
|
||||
with m.State("TX_DATA_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += [xfer_stream.eq(0), xfer_wrap.eq(0),
|
||||
tx_wr.eq(tx_wr + s_count)] # advanced pointer
|
||||
m.next = "TX_UPDPTR"
|
||||
|
||||
with m.State("TX_UPDPTR"): # write back S0_TX_WR
|
||||
m.d.sync += [xfer_addr.eq(_S0_TX_WR), xfer_rw.eq(1),
|
||||
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
|
||||
xfer_direct.eq(0), xfer_len.eq(2)]
|
||||
m.d.sync += [wbuf[0].eq(tx_wr[8:16]), wbuf[1].eq(tx_wr[0:8])]
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "TX_UPDPTR_W"
|
||||
with m.State("TX_UPDPTR_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.next = "TX_SEND"
|
||||
|
||||
# S0_CR = SEND
|
||||
write_reg("TX_SEND", _S0_CR, [_CR_SEND], "IDLE")
|
||||
|
||||
# ── RX MACRAW ────────────────────────────────────────────────────
|
||||
# On W5100 INT: read RX_RSR; if non-zero read RX_RD, read the 2-byte
|
||||
# MACRAW length, stream (length−2) frame bytes out (ring-wrapping),
|
||||
# advance RX_RD by the length, issue RECV, clear the RECV interrupt.
|
||||
with m.State("RX_CHECK"): # read S0_RX_RSR (2 bytes)
|
||||
m.d.sync += [xfer_addr.eq(_S0_RX_RSR), xfer_rw.eq(0),
|
||||
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
|
||||
xfer_direct.eq(0), xfer_len.eq(2)]
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "RX_RSR_W"
|
||||
with m.State("RX_RSR_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += rx_rsr.eq(Cat(rbuf[1], rbuf[0]))
|
||||
m.next = "RX_RSR_CHK"
|
||||
with m.State("RX_RSR_CHK"):
|
||||
with m.If(rx_rsr == 0):
|
||||
m.next = "IDLE" # nothing received
|
||||
with m.Else():
|
||||
m.next = "RX_RDPTR"
|
||||
|
||||
with m.State("RX_RDPTR"): # read S0_RX_RD (2 bytes)
|
||||
m.d.sync += [xfer_addr.eq(_S0_RX_RD), xfer_rw.eq(0),
|
||||
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
|
||||
xfer_direct.eq(0), xfer_len.eq(2)]
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "RX_RDPTR_W"
|
||||
with m.State("RX_RDPTR_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += rx_rd.eq(Cat(rbuf[1], rbuf[0]))
|
||||
m.next = "RX_LEN"
|
||||
|
||||
with m.State("RX_LEN"): # read 2-byte MACRAW length (wrap)
|
||||
m.d.sync += [xfer_addr.eq(_RX_BASE + (rx_rd & _S0_RX_MASK)),
|
||||
xfer_rw.eq(0), xfer_stream.eq(0), xfer_sread.eq(0),
|
||||
xfer_direct.eq(0), xfer_len.eq(2), xfer_wrap.eq(1),
|
||||
xfer_wbase.eq(_RX_BASE),
|
||||
xfer_wend.eq(_RX_BASE + _S0_RX_MASK + 1)]
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "RX_LEN_W"
|
||||
with m.State("RX_LEN_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += pkt_len.eq(Cat(rbuf[1], rbuf[0]))
|
||||
m.next = "RX_FRAME"
|
||||
|
||||
with m.State("RX_FRAME"): # stream (pkt_len−2) frame bytes
|
||||
m.d.sync += [xfer_addr.eq(_RX_BASE + ((rx_rd + 2) & _S0_RX_MASK)),
|
||||
xfer_rw.eq(0), xfer_stream.eq(0), xfer_sread.eq(1),
|
||||
xfer_direct.eq(0), xfer_rcount.eq(pkt_len - 2),
|
||||
xfer_wrap.eq(1), xfer_wbase.eq(_RX_BASE),
|
||||
xfer_wend.eq(_RX_BASE + _S0_RX_MASK + 1)]
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "RX_FRAME_W"
|
||||
with m.State("RX_FRAME_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += [xfer_sread.eq(0), xfer_wrap.eq(0)]
|
||||
m.next = "RX_UPDRD"
|
||||
|
||||
with m.State("RX_UPDRD"): # S0_RX_RD += pkt_len, write back
|
||||
m.d.sync += [xfer_addr.eq(_S0_RX_RD), xfer_rw.eq(1),
|
||||
xfer_stream.eq(0), xfer_sread.eq(0), xfer_wrap.eq(0),
|
||||
xfer_direct.eq(0), xfer_len.eq(2)]
|
||||
m.d.sync += [wbuf[0].eq((rx_rd + pkt_len)[8:16]),
|
||||
wbuf[1].eq((rx_rd + pkt_len)[0:8])]
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "RX_UPDRD_W"
|
||||
with m.State("RX_UPDRD_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.next = "RX_RECV"
|
||||
|
||||
# S0_CR = RECV, then clear the RECV interrupt bit (S0_IR[2]).
|
||||
write_reg("RX_RECV", _S0_CR, [_CR_RECV], "RX_CLR_IR")
|
||||
write_reg("RX_CLR_IR", _S0_IR, [0x04], "IDLE")
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Testbench ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from amaranth.sim import Simulator, Period
|
||||
|
||||
dut = W5100ParallelMaster(strobe_cycles=3, reset_cycles=10)
|
||||
errors = []
|
||||
|
||||
MAC = [0x11, 0x22, 0x33, 0x44, 0x55, 0x66]
|
||||
PAR = sum(b << (8 * i) for i, b in enumerate(MAC))
|
||||
|
||||
# Expected indirect-address writes captured by the model (addr, value).
|
||||
# MR is written directly (A=00) → captured as ('MR', value).
|
||||
EXPECTED = [
|
||||
("MR", _MR_RST),
|
||||
("MR", _MR_IND | _MR_AI),
|
||||
(_SHAR0 + 0, MAC[0]), (_SHAR0 + 1, MAC[1]), (_SHAR0 + 2, MAC[2]),
|
||||
(_SHAR0 + 3, MAC[3]), (_SHAR0 + 4, MAC[4]), (_SHAR0 + 5, MAC[5]),
|
||||
(_RMSR + 0, 0x55), (_RMSR + 1, 0x55),
|
||||
(_S0_MR, _S0_MR_MACRAW),
|
||||
(_S0_CR, _CR_OPEN),
|
||||
(_IMR, 0x01),
|
||||
]
|
||||
|
||||
writes = [] # captured (addr-or-'MR', value) — IDM_DR + MR writes
|
||||
model_mem = {} # W5100 memory image (registers + TX/RX buffers)
|
||||
|
||||
async def w5100_model(ctx):
|
||||
"""W5100 indirect-bus slave model: tracks MR/IDM_AR, records IDM_DR and
|
||||
MR writes, and drives bus_data_i for reads. Mode-0 timing: a write is
|
||||
latched on /WR rising while /CS low; reads driven while /RD low."""
|
||||
idm_ar = 0
|
||||
mr = 0
|
||||
prev_cs = prev_rd = prev_wr = 1
|
||||
async for vals in ctx.tick("sync").sample(
|
||||
dut.cs_n, dut.rd_n, dut.wr_n,
|
||||
dut.bus_addr, dut.bus_data_o, dut.bus_data_oe):
|
||||
cs, rd, wr, a, do, doe = vals[-6:]
|
||||
ai = (mr >> 1) & 1 # MR.AI
|
||||
|
||||
# Drive read data while /RD asserted (combinational, before sample).
|
||||
if cs == 0 and rd == 0:
|
||||
if a == _A_MR:
|
||||
val = mr
|
||||
elif a == _A_AR0:
|
||||
val = (idm_ar >> 8) & 0xFF
|
||||
elif a == _A_AR1:
|
||||
val = idm_ar & 0xFF
|
||||
else:
|
||||
val = model_mem.get(idm_ar, 0)
|
||||
ctx.set(dut.bus_data_i, val)
|
||||
|
||||
# Latch write on /WR rising edge.
|
||||
if cs == 0 and prev_wr == 0 and wr == 1:
|
||||
if a == _A_MR:
|
||||
mr = do
|
||||
writes.append(("MR", do))
|
||||
elif a == _A_AR0:
|
||||
idm_ar = (idm_ar & 0x00FF) | (do << 8)
|
||||
elif a == _A_AR1:
|
||||
idm_ar = (idm_ar & 0xFF00) | do
|
||||
else: # IDM_DR
|
||||
model_mem[idm_ar] = do
|
||||
writes.append((idm_ar, do))
|
||||
# RECV command consumes the RX data: clear RSR (mirrors HW).
|
||||
if idm_ar == _S0_CR and do == _CR_RECV:
|
||||
model_mem[_S0_RX_RSR] = 0
|
||||
model_mem[_S0_RX_RSR + 1] = 0
|
||||
if ai:
|
||||
idm_ar = (idm_ar + 1) & 0xFFFF
|
||||
# Auto-increment after a data read (/RD rising, A=DR).
|
||||
if cs == 0 and prev_rd == 0 and rd == 1 and a == _A_DR and ai:
|
||||
idm_ar = (idm_ar + 1) & 0xFFFF
|
||||
|
||||
prev_cs, prev_rd, prev_wr = cs, rd, wr
|
||||
|
||||
async def testbench(ctx):
|
||||
ctx.set(dut.par, PAR)
|
||||
await ctx.tick("sync").repeat(2)
|
||||
|
||||
# T1: trigger init, wait for init_done.
|
||||
ctx.set(dut.init_req, 1)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
ctx.set(dut.init_req, 0)
|
||||
|
||||
done = False
|
||||
for _ in range(4000):
|
||||
await ctx.tick("sync").repeat(1)
|
||||
if ctx.get(dut.init_done):
|
||||
done = True
|
||||
break
|
||||
if not done:
|
||||
errors.append("init_done never asserted")
|
||||
|
||||
print(f"T1 init captured {len(writes)} writes")
|
||||
if writes != EXPECTED:
|
||||
errors.append("init write sequence mismatch")
|
||||
for i in range(max(len(writes), len(EXPECTED))):
|
||||
g = writes[i] if i < len(writes) else None
|
||||
e = EXPECTED[i] if i < len(EXPECTED) else None
|
||||
mark = "" if g == e else " <-- MISMATCH"
|
||||
gs = f"({g[0]:#06x},{g[1]:#04x})" if g and isinstance(g[0], int) else str(g)
|
||||
es = f"({e[0]:#06x},{e[1]:#04x})" if e and isinstance(e[0], int) else str(e)
|
||||
print(f" [{i:2}] got {gs:20} exp {es:20}{mark}")
|
||||
else:
|
||||
print("T1 init sequence matches expected (MR, SHAR, mem sizes, "
|
||||
"S0 MACRAW/OPEN, IMR)")
|
||||
|
||||
# ── helper: stream one TX frame through the external tx interface ─────
|
||||
async def feed_frame(ctx, frame):
|
||||
for i, b in enumerate(frame):
|
||||
ctx.set(dut.tx_data, b)
|
||||
ctx.set(dut.tx_valid, 1)
|
||||
ctx.set(dut.tx_sof, 1 if i == 0 else 0)
|
||||
ctx.set(dut.tx_eof, 1 if i == len(frame) - 1 else 0)
|
||||
got = False
|
||||
for _ in range(400):
|
||||
await ctx.tick("sync").repeat(1)
|
||||
if ctx.get(dut.tx_ready):
|
||||
got = True
|
||||
break
|
||||
if not got:
|
||||
errors.append(f"feed_frame: byte {i} never consumed")
|
||||
return
|
||||
ctx.set(dut.tx_valid, 0)
|
||||
ctx.set(dut.tx_sof, 0)
|
||||
ctx.set(dut.tx_eof, 0)
|
||||
# let TX_UPDPTR + SEND complete
|
||||
for _ in range(200):
|
||||
await ctx.tick("sync").repeat(1)
|
||||
if model_mem.get(_S0_CR) == _CR_SEND:
|
||||
break
|
||||
|
||||
# ── T2: TX MACRAW frame (TX_WR=0, no wrap) ───────────────────────────
|
||||
FRAME = [0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x10, 0x20]
|
||||
await feed_frame(ctx, FRAME)
|
||||
|
||||
buf = [model_mem.get(_TX_BASE + i, None) for i in range(len(FRAME))]
|
||||
if buf != FRAME:
|
||||
errors.append(f"T2 TX buffer mismatch: {buf} != {FRAME}")
|
||||
tx_wr_hi = model_mem.get(_S0_TX_WR, 0)
|
||||
tx_wr_lo = model_mem.get(_S0_TX_WR + 1, 0)
|
||||
adv = (tx_wr_hi << 8) | tx_wr_lo
|
||||
if adv != len(FRAME):
|
||||
errors.append(f"T2 S0_TX_WR advance: got {adv}, want {len(FRAME)}")
|
||||
if model_mem.get(_S0_CR) != _CR_SEND:
|
||||
errors.append("T2 SEND command not issued")
|
||||
print(f"T2 TX: buffer={['0x%02X' % b for b in buf]} "
|
||||
f"TX_WR={adv} SEND={model_mem.get(_S0_CR)==_CR_SEND}")
|
||||
|
||||
# ── T3: TX MACRAW with ring wraparound (TX_WR near 2 KB boundary) ─────
|
||||
# Pre-load S0_TX_WR = 0x07FE so a 6-byte frame straddles the boundary:
|
||||
# offsets 0x7FE,0x7FF then wraps to 0x000,0x001,0x002,0x003.
|
||||
model_mem[_S0_TX_WR] = 0x07
|
||||
model_mem[_S0_TX_WR + 1] = 0xFE
|
||||
model_mem[_S0_CR] = 0x00 # clear so we can detect the new SEND
|
||||
WFRAME = [0x41, 0x42, 0x43, 0x44, 0x45, 0x46]
|
||||
await feed_frame(ctx, WFRAME)
|
||||
|
||||
# expected physical layout
|
||||
exp = {
|
||||
_TX_BASE + 0x7FE: WFRAME[0],
|
||||
_TX_BASE + 0x7FF: WFRAME[1],
|
||||
_TX_BASE + 0x000: WFRAME[2],
|
||||
_TX_BASE + 0x001: WFRAME[3],
|
||||
_TX_BASE + 0x002: WFRAME[4],
|
||||
_TX_BASE + 0x003: WFRAME[5],
|
||||
}
|
||||
for addr, want in exp.items():
|
||||
got = model_mem.get(addr)
|
||||
if got != want:
|
||||
errors.append(f"T3 wrap byte @0x{addr:04X}: got {got}, want 0x{want:02X}")
|
||||
adv2 = (model_mem.get(_S0_TX_WR, 0) << 8) | model_mem.get(_S0_TX_WR + 1, 0)
|
||||
want_wr = (0x07FE + len(WFRAME)) & 0xFFFF
|
||||
if adv2 != want_wr:
|
||||
errors.append(f"T3 wrap S0_TX_WR: got 0x{adv2:04X}, want 0x{want_wr:04X}")
|
||||
ok = all(model_mem.get(a) == v for a, v in exp.items())
|
||||
print(f"T3 TX wrap: bytes_placed_ok={ok} TX_WR=0x{adv2:04X} (want 0x{want_wr:04X})")
|
||||
|
||||
# ── helper: drive an RX event and collect the streamed-out frame ─────
|
||||
def load_rx(rx_rd_off, frame):
|
||||
"""Place a MACRAW packet [len_hi,len_lo,frame...] in the RX buffer at
|
||||
offset rx_rd_off (ring), set RX_RSR/RX_RD, return the 16-bit length."""
|
||||
plen = len(frame) + 2
|
||||
payload = [(plen >> 8) & 0xFF, plen & 0xFF] + list(frame)
|
||||
for i, b in enumerate(payload):
|
||||
off = (rx_rd_off + i) & _S0_RX_MASK
|
||||
model_mem[_RX_BASE + off] = b
|
||||
model_mem[_S0_RX_RSR] = (plen >> 8) & 0xFF
|
||||
model_mem[_S0_RX_RSR + 1] = plen & 0xFF
|
||||
model_mem[_S0_RX_RD] = (rx_rd_off >> 8) & 0xFF
|
||||
model_mem[_S0_RX_RD + 1] = rx_rd_off & 0xFF
|
||||
return plen
|
||||
|
||||
async def do_rx(ctx, rx_rd_off, frame):
|
||||
plen = load_rx(rx_rd_off, frame)
|
||||
ctx.set(dut.rx_ready, 1)
|
||||
collected = []
|
||||
ctx.set(dut.w5100_int_n, 0) # assert RX interrupt
|
||||
for _ in range(1500):
|
||||
await ctx.tick("sync").repeat(1)
|
||||
if ctx.get(dut.rx_valid) and ctx.get(dut.rx_ready):
|
||||
collected.append(ctx.get(dut.rx_data))
|
||||
if model_mem.get(_S0_CR) == _CR_RECV:
|
||||
break
|
||||
ctx.set(dut.w5100_int_n, 1) # deassert; let it finish + idle
|
||||
for _ in range(300):
|
||||
await ctx.tick("sync").repeat(1)
|
||||
ctx.set(dut.rx_ready, 0)
|
||||
return collected, plen
|
||||
|
||||
# ── T4: RX MACRAW frame (RX_RD=0, no wrap) ───────────────────────────
|
||||
model_mem[_S0_CR] = 0x00
|
||||
RX_FRAME = [0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02, 0x03]
|
||||
got, plen = await do_rx(ctx, 0x0000, RX_FRAME)
|
||||
if got != RX_FRAME:
|
||||
errors.append(f"T4 RX frame mismatch: {['0x%02X'%b for b in got]} != "
|
||||
f"{['0x%02X'%b for b in RX_FRAME]}")
|
||||
new_rd = (model_mem.get(_S0_RX_RD, 0) << 8) | model_mem.get(_S0_RX_RD + 1, 0)
|
||||
if new_rd != plen:
|
||||
errors.append(f"T4 RX_RD advance: got 0x{new_rd:04X}, want 0x{plen:04X}")
|
||||
print(f"T4 RX: frame={['0x%02X'%b for b in got]} RX_RD=0x{new_rd:04X} "
|
||||
f"RECV={model_mem.get(_S0_CR)==_CR_RECV}")
|
||||
|
||||
# ── T5: RX MACRAW with ring wraparound (RX_RD near 2 KB boundary) ─────
|
||||
model_mem[_S0_CR] = 0x00
|
||||
RX_FRAME2 = [0x51, 0x52, 0x53, 0x54, 0x55]
|
||||
# rx_rd = 0x07FD: [len_hi@7FD][len_lo@7FE][f0@7FF][f1@000][f2@001]...
|
||||
got2, plen2 = await do_rx(ctx, 0x07FD, RX_FRAME2)
|
||||
if got2 != RX_FRAME2:
|
||||
errors.append(f"T5 RX wrap frame mismatch: {['0x%02X'%b for b in got2]} != "
|
||||
f"{['0x%02X'%b for b in RX_FRAME2]}")
|
||||
new_rd2 = (model_mem.get(_S0_RX_RD, 0) << 8) | model_mem.get(_S0_RX_RD + 1, 0)
|
||||
want_rd2 = (0x07FD + plen2) & 0xFFFF
|
||||
if new_rd2 != want_rd2:
|
||||
errors.append(f"T5 RX wrap RX_RD: got 0x{new_rd2:04X}, want 0x{want_rd2:04X}")
|
||||
print(f"T5 RX wrap: frame={['0x%02X'%b for b in got2]} "
|
||||
f"RX_RD=0x{new_rd2:04X} (want 0x{want_rd2:04X})")
|
||||
|
||||
sim = Simulator(dut)
|
||||
sim.add_clock(Period(MHz=24), domain="sync")
|
||||
sim.add_testbench(testbench)
|
||||
sim.add_process(w5100_model)
|
||||
|
||||
sim.run()
|
||||
|
||||
if errors:
|
||||
print("\nFAILURES:")
|
||||
for e in errors:
|
||||
print(" ", e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nAll tests passed.")
|
||||
@@ -0,0 +1,760 @@
|
||||
"""W5500 SPI master — sync domain (24 MHz).
|
||||
|
||||
SPI Mode 0 (CPOL=0, CPHA=0): CLK idles LOW, data captured on rising edge.
|
||||
SCK = 12 MHz: the sync domain is 24 MHz and the bit engine toggles SCK via a
|
||||
clock-enable (sync ÷ 2).
|
||||
|
||||
W5500 frame format
|
||||
------------------
|
||||
Byte 0–1 Address (16-bit big-endian)
|
||||
Byte 2 Control: [7:3]=BSB [2]=R/W [1:0]=OM
|
||||
Byte 3+ Data
|
||||
|
||||
BSB values used here:
|
||||
0b00000 Common registers
|
||||
0b00001 Socket 0 registers
|
||||
0b00010 Socket 0 TX buffer
|
||||
0b00011 Socket 0 RX buffer
|
||||
|
||||
After NCRA reset the driver issues the W5500 init sequence (MR reset, SHAR,
|
||||
S0_MR MACRAW, S0_CR OPEN, S0_IMR).
|
||||
|
||||
The module provides:
|
||||
- A streaming TX interface (tx_data/tx_valid/tx_ready + sof/eof framing)
|
||||
- A streaming RX interface (rx_data/rx_valid/rx_ready + sof/eof)
|
||||
- init_req / init_done for the NCRA-triggered init sequence
|
||||
- MAC source address shadow input (par[0..5]) for SHAR programming
|
||||
"""
|
||||
|
||||
from amaranth import *
|
||||
|
||||
__all__ = ["W5500SPIMaster"]
|
||||
|
||||
# W5500 register addresses. The 16-bit address is the OFFSET WITHIN A BLOCK;
|
||||
# the block is selected by the BSB field of the control byte (see _CTRL_*),
|
||||
# NOT by the address. So socket-0 registers use small offsets with BSB=1.
|
||||
_W5500_MR = 0x0000 # Mode register (common block)
|
||||
_W5500_SHAR = 0x0009 # Source MAC, 6 bytes (common block)
|
||||
_W5500_S0_MR = 0x0000 # Socket 0 Mode (socket-0 block)
|
||||
_W5500_S0_CR = 0x0001 # Socket 0 Command
|
||||
_W5500_S0_IR = 0x0002 # Socket 0 Interrupt
|
||||
_W5500_S0_RXBUF_SIZE = 0x001E # Socket 0 RX buffer size
|
||||
_W5500_S0_TXBUF_SIZE = 0x001F # Socket 0 TX buffer size
|
||||
_W5500_S0_TX_FSR = 0x0020 # Socket 0 TX Free Size (2 bytes)
|
||||
_W5500_S0_TX_WR = 0x0024 # Socket 0 TX Write Pointer
|
||||
_W5500_S0_RX_RSR = 0x0026 # Socket 0 RX Received Size (2 bytes)
|
||||
_W5500_S0_RX_RD = 0x0028 # Socket 0 RX Read Pointer
|
||||
_W5500_S0_IMR = 0x002C # Socket 0 Interrupt Mask
|
||||
|
||||
# Control byte = (BSB << 3) | (RWB << 2) | OM.
|
||||
# RWB: 1=write 0=read. OM=00 → Variable Data Mode (CS frames the length).
|
||||
# BSB: 0=common, 1=socket0 reg, 2=socket0 TX buffer, 3=socket0 RX buffer.
|
||||
_CTRL_WR_COMMON = (0 << 3) | (1 << 2) # 0x04
|
||||
_CTRL_WR_S0REG = (1 << 3) | (1 << 2) # 0x0C
|
||||
_CTRL_RD_S0REG = (1 << 3) | (0 << 2) # 0x08
|
||||
_CTRL_WR_S0TX = (2 << 3) | (1 << 2) # 0x14
|
||||
_CTRL_RD_S0RX = (3 << 3) | (0 << 2) # 0x18
|
||||
|
||||
|
||||
class W5500SPIMaster(Elaboratable):
|
||||
"""W5500 SPI master in the sync clock domain.
|
||||
|
||||
Physical SPI pins
|
||||
-----------------
|
||||
spi_clk / spi_mosi / spi_miso / spi_cs_n : to W5500
|
||||
w5500_int_n : W5500 INT_N input (active low)
|
||||
w5500_rst_n : W5500 hardware reset (active low)
|
||||
|
||||
Init interface (from BBARegisterFile / BBATop)
|
||||
----------------------------------------------
|
||||
init_req : pulse to trigger the W5500 init sequence
|
||||
init_done : pulse when init sequence completes
|
||||
par : 6-byte MAC address (sampled at init_req)
|
||||
|
||||
TX streaming interface (from TXFrameDrain, sync domain)
|
||||
-------------------------------------------------------
|
||||
tx_data / tx_valid / tx_ready : byte stream
|
||||
tx_sof / tx_eof : frame delimiters on the same cycle as tx_valid
|
||||
|
||||
RX streaming interface (to RXFrameAssembler, sync domain)
|
||||
----------------------------------------------------------
|
||||
rx_data / rx_valid / rx_ready : byte stream
|
||||
rx_sof / rx_eof : frame delimiters
|
||||
"""
|
||||
|
||||
def __init__(self, clk_div=1, reset_cycles=24000):
|
||||
# MR-reset settle wait (in sync cycles). ~1 ms; the testbench
|
||||
# overrides with a small value for fast simulation.
|
||||
self._reset_cycles = reset_cycles
|
||||
|
||||
# SPI SCK = sync_clock / (2 * clk_div). clk_div=1 → full rate (SCK =
|
||||
# sync/2): at the 24 MHz slow domain that is 12 MHz SCK (~12 Mbit/s),
|
||||
# which comfortably exceeds real-world GC BBA TCP throughput. The W5500
|
||||
# tolerates up to 80 MHz SCK, so the divider exists only as a safety
|
||||
# knob for board-level signal-integrity issues, not a functional need.
|
||||
self._clk_div = clk_div
|
||||
|
||||
# Physical SPI
|
||||
self.spi_clk = Signal()
|
||||
self.spi_mosi = Signal()
|
||||
self.spi_miso = Signal()
|
||||
self.spi_cs_n = Signal(init=1)
|
||||
self.w5500_int_n = Signal(init=1)
|
||||
self.w5500_rst_n = Signal(init=1)
|
||||
|
||||
# Init control
|
||||
self.init_req = Signal()
|
||||
self.init_done = Signal()
|
||||
self.par = Signal(48) # MAC address (PAR0..5 packed)
|
||||
|
||||
# TX stream
|
||||
self.tx_data = Signal(8)
|
||||
self.tx_valid = Signal()
|
||||
self.tx_ready = Signal()
|
||||
self.tx_sof = Signal()
|
||||
self.tx_eof = Signal()
|
||||
|
||||
# RX stream
|
||||
self.rx_data = Signal(8)
|
||||
self.rx_valid = Signal()
|
||||
self.rx_ready = Signal()
|
||||
self.rx_sof = Signal()
|
||||
self.rx_eof = Signal()
|
||||
|
||||
def elaborate(self, platform):
|
||||
m = Module()
|
||||
|
||||
# ── SPI clock enable ─────────────────────────────────────────────
|
||||
# clk_en high every `clk_div` sync cycles. The bit engine toggles SCK
|
||||
# on each enabled cycle, so SCK = sync / (2 * clk_div).
|
||||
clk_en = Signal()
|
||||
if self._clk_div <= 1:
|
||||
m.d.comb += clk_en.eq(1) # full rate: SCK = sync/2
|
||||
else:
|
||||
div_ctr = Signal(range(self._clk_div))
|
||||
with m.If(div_ctr == self._clk_div - 1):
|
||||
m.d.sync += div_ctr.eq(0)
|
||||
with m.Else():
|
||||
m.d.sync += div_ctr.eq(div_ctr + 1)
|
||||
m.d.comb += clk_en.eq(div_ctr == self._clk_div - 1)
|
||||
|
||||
# ── SPI pin registers (Mode 0: SCK idles LOW) ────────────────────
|
||||
sck_r = Signal()
|
||||
cs_r = Signal(init=1)
|
||||
shift_out = Signal(8)
|
||||
shift_in = Signal(8)
|
||||
m.d.comb += self.spi_clk .eq(sck_r)
|
||||
m.d.comb += self.spi_cs_n.eq(cs_r)
|
||||
m.d.comb += self.spi_mosi.eq(shift_out[7]) # MSB first; valid pre-rising
|
||||
|
||||
# ── Byte-transfer engine (Mode 0) ────────────────────────────────
|
||||
# On byte_start, shift out byte_tx MSB-first (8 SCK cycles) and capture
|
||||
# MISO into byte_rx; pulse byte_done. CS is owned by the xfer engine.
|
||||
byte_start = Signal()
|
||||
byte_tx = Signal(8)
|
||||
byte_rx = Signal(8)
|
||||
byte_done = Signal()
|
||||
bit_ctr = Signal(4)
|
||||
|
||||
m.d.sync += byte_done.eq(0)
|
||||
with m.FSM(domain="sync", name="byte_fsm"):
|
||||
with m.State("IDLE"):
|
||||
m.d.sync += sck_r.eq(0)
|
||||
with m.If(byte_start):
|
||||
m.d.sync += shift_out.eq(byte_tx)
|
||||
m.d.sync += bit_ctr.eq(0)
|
||||
m.next = "RUN"
|
||||
with m.State("RUN"):
|
||||
with m.If(clk_en):
|
||||
with m.If(~sck_r):
|
||||
# rising edge: slave samples MOSI, master samples MISO
|
||||
m.d.sync += sck_r.eq(1)
|
||||
m.d.sync += shift_in.eq(Cat(self.spi_miso, shift_in[:-1]))
|
||||
with m.Else():
|
||||
# falling edge: advance / finish
|
||||
m.d.sync += sck_r.eq(0)
|
||||
with m.If(bit_ctr == 7):
|
||||
m.d.sync += byte_rx.eq(shift_in)
|
||||
m.d.sync += byte_done.eq(1)
|
||||
m.next = "IDLE"
|
||||
with m.Else():
|
||||
m.d.sync += shift_out.eq(Cat(0, shift_out[:-1]))
|
||||
m.d.sync += bit_ctr.eq(bit_ctr + 1)
|
||||
|
||||
# ── Generic register transaction engine (Variable Data Mode) ─────
|
||||
# One CS-low frame: 3 header bytes (addr_hi, addr_lo, ctrl) then
|
||||
# xfer_len payload bytes. Writes source payload from wbuf; reads
|
||||
# capture MISO into rbuf.
|
||||
WBUF = 8
|
||||
xfer_start = Signal()
|
||||
xfer_addr = Signal(16)
|
||||
xfer_ctrl = Signal(8)
|
||||
xfer_len = Signal(range(WBUF + 1))
|
||||
xfer_done = Signal()
|
||||
wbuf = Array([Signal(8, name=f"wbuf{i}") for i in range(WBUF)])
|
||||
rbuf = Array([Signal(8, name=f"rbuf{i}") for i in range(WBUF)])
|
||||
xfer_idx = Signal(range(WBUF + 3))
|
||||
|
||||
# Stream-write mode: after the 3-byte header, payload bytes are pulled
|
||||
# from (s_data, s_valid, s_last) instead of wbuf, until s_last. Used to
|
||||
# forward a frame straight into the W5500 TX buffer. s_consume pulses
|
||||
# as each streamed byte is accepted; s_count tracks the byte count.
|
||||
xfer_stream = Signal()
|
||||
s_data = Signal(8)
|
||||
s_valid = Signal()
|
||||
s_last = Signal()
|
||||
s_consume = Signal()
|
||||
s_count = Signal(16)
|
||||
s_last_r = Signal() # latched s_last for the in-flight byte
|
||||
|
||||
# Stream-read mode: after the header, read `xfer_rcount` payload bytes
|
||||
# (sending 0x00 dummies) and push each out via (r_data, r_valid,
|
||||
# r_first, r_last) with r_ready back-pressure. Used to pull a frame
|
||||
# out of the W5500 RX buffer into RXFrameAssembler.
|
||||
xfer_sread = Signal()
|
||||
xfer_rcount = Signal(16)
|
||||
r_data = Signal(8)
|
||||
r_valid = Signal()
|
||||
r_first = Signal()
|
||||
r_last = Signal()
|
||||
r_ready = Signal()
|
||||
r_idx = Signal(16)
|
||||
|
||||
x_byte = Signal(8)
|
||||
with m.If(xfer_idx == 0):
|
||||
m.d.comb += x_byte.eq(xfer_addr[8:16])
|
||||
with m.Elif(xfer_idx == 1):
|
||||
m.d.comb += x_byte.eq(xfer_addr[0:8])
|
||||
with m.Elif(xfer_idx == 2):
|
||||
m.d.comb += x_byte.eq(xfer_ctrl)
|
||||
with m.Else():
|
||||
m.d.comb += x_byte.eq(wbuf[xfer_idx - 3])
|
||||
|
||||
m.d.comb += byte_start.eq(0)
|
||||
m.d.comb += byte_tx.eq(0)
|
||||
m.d.comb += s_consume.eq(0)
|
||||
m.d.comb += r_valid.eq(0)
|
||||
m.d.comb += r_data.eq(0)
|
||||
m.d.comb += r_first.eq(0)
|
||||
m.d.comb += r_last.eq(0)
|
||||
|
||||
m.d.sync += xfer_done.eq(0)
|
||||
with m.FSM(domain="sync", name="xfer_fsm"):
|
||||
with m.State("IDLE"):
|
||||
with m.If(xfer_start):
|
||||
m.d.sync += cs_r.eq(0) # assert CS for the frame
|
||||
m.d.sync += xfer_idx.eq(0)
|
||||
m.d.sync += s_count.eq(0)
|
||||
m.d.sync += r_idx.eq(0)
|
||||
m.next = "LOAD"
|
||||
with m.State("LOAD"):
|
||||
m.d.comb += byte_tx.eq(x_byte)
|
||||
m.d.comb += byte_start.eq(1)
|
||||
m.next = "WAIT"
|
||||
with m.State("WAIT"):
|
||||
with m.If(byte_done):
|
||||
with m.If(xfer_idx >= 3):
|
||||
m.d.sync += rbuf[xfer_idx - 3].eq(byte_rx)
|
||||
with m.If((xfer_idx == 2) & xfer_stream):
|
||||
m.next = "SLOAD" # stream the payload (write)
|
||||
with m.Elif((xfer_idx == 2) & xfer_sread):
|
||||
m.next = "RLOAD" # stream the payload (read)
|
||||
with m.Elif(~xfer_stream & ~xfer_sread
|
||||
& (xfer_idx == (xfer_len + 2))):
|
||||
m.next = "FINISH" # 3 header + len − 1
|
||||
with m.Else():
|
||||
m.d.sync += xfer_idx.eq(xfer_idx + 1)
|
||||
m.next = "LOAD"
|
||||
|
||||
# ── Streamed-payload sub-loop (TX buffer write) ──────────────
|
||||
with m.State("SLOAD"):
|
||||
with m.If(s_valid):
|
||||
m.d.comb += byte_tx.eq(s_data)
|
||||
m.d.comb += byte_start.eq(1)
|
||||
m.d.sync += s_last_r.eq(s_last)
|
||||
m.next = "SWAIT"
|
||||
with m.State("SWAIT"):
|
||||
with m.If(byte_done):
|
||||
m.d.comb += s_consume.eq(1) # accept this frame byte
|
||||
m.d.sync += s_count.eq(s_count + 1)
|
||||
with m.If(s_last_r):
|
||||
m.next = "FINISH"
|
||||
with m.Else():
|
||||
m.next = "SLOAD"
|
||||
|
||||
# ── Streamed-payload sub-loop (RX buffer read) ───────────────
|
||||
with m.State("RLOAD"):
|
||||
with m.If(r_idx == xfer_rcount):
|
||||
m.next = "FINISH"
|
||||
with m.Else():
|
||||
m.d.comb += byte_tx.eq(0) # dummy MOSI during read
|
||||
m.d.comb += byte_start.eq(1)
|
||||
m.next = "RWAIT"
|
||||
with m.State("RWAIT"):
|
||||
with m.If(byte_done):
|
||||
m.next = "RPUSH"
|
||||
with m.State("RPUSH"):
|
||||
m.d.comb += r_data .eq(byte_rx)
|
||||
m.d.comb += r_valid.eq(1)
|
||||
m.d.comb += r_first.eq(r_idx == 0)
|
||||
m.d.comb += r_last .eq(r_idx == (xfer_rcount - 1))
|
||||
with m.If(r_ready):
|
||||
m.d.sync += r_idx.eq(r_idx + 1)
|
||||
m.next = "RLOAD"
|
||||
|
||||
with m.State("FINISH"):
|
||||
m.d.sync += cs_r.eq(1) # deassert CS
|
||||
m.d.sync += xfer_done.eq(1)
|
||||
m.next = "IDLE"
|
||||
|
||||
# Saved MAC for SHAR programming; current W5500 TX write pointer.
|
||||
mac_shadow = Array([Signal(8, name=f"mac{i}") for i in range(6)])
|
||||
wait_ctr = Signal(range(self._reset_cycles + 2))
|
||||
tx_wr = Signal(16)
|
||||
rx_rsr = Signal(16) # RX received size
|
||||
rx_rd = Signal(16) # RX read pointer
|
||||
pkt_len = Signal(16) # MACRAW packet length (incl. 2-byte header)
|
||||
|
||||
# Frame stream from TXFrameDrain feeds the xfer engine's stream port.
|
||||
# tx_ready pulses (= s_consume) as each frame byte is taken into the
|
||||
# TX-buffer write transaction.
|
||||
m.d.comb += [
|
||||
s_data .eq(self.tx_data),
|
||||
s_valid.eq(self.tx_valid),
|
||||
s_last .eq(self.tx_eof),
|
||||
self.tx_ready.eq(s_consume),
|
||||
]
|
||||
# RX buffer read stream → RXFrameAssembler.
|
||||
m.d.comb += [
|
||||
self.rx_data .eq(r_data),
|
||||
self.rx_valid.eq(r_valid),
|
||||
self.rx_sof .eq(r_first),
|
||||
self.rx_eof .eq(r_last),
|
||||
r_ready .eq(self.rx_ready),
|
||||
]
|
||||
|
||||
# Helper: a setup state that programs one register-write transaction
|
||||
# then waits for it to complete and jumps to `nxt`.
|
||||
def write_reg(name, addr, ctrl, payload, nxt):
|
||||
with m.State(name):
|
||||
m.d.sync += xfer_addr.eq(addr)
|
||||
m.d.sync += xfer_ctrl.eq(ctrl)
|
||||
m.d.sync += xfer_len.eq(len(payload))
|
||||
m.d.sync += xfer_stream.eq(0)
|
||||
m.d.sync += xfer_sread.eq(0)
|
||||
for i, b in enumerate(payload):
|
||||
m.d.sync += wbuf[i].eq(b)
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = name + "_W"
|
||||
with m.State(name + "_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.next = nxt
|
||||
|
||||
# ── Main control FSM ─────────────────────────────────────────────
|
||||
with m.FSM(domain="sync", name="main_fsm"):
|
||||
|
||||
with m.State("IDLE"):
|
||||
m.d.sync += self.init_done.eq(0)
|
||||
with m.If(self.init_req):
|
||||
for i in range(6):
|
||||
m.d.sync += mac_shadow[i].eq(self.par[i*8:(i+1)*8])
|
||||
m.next = "MR_RST"
|
||||
with m.Elif(~self.w5500_int_n):
|
||||
m.next = "RX_CHECK"
|
||||
with m.Elif(self.tx_valid & self.tx_sof):
|
||||
m.next = "TX_START"
|
||||
|
||||
# Step 1: MR = 0x80 (software reset), then settle ~1 ms.
|
||||
write_reg("MR_RST", _W5500_MR, _CTRL_WR_COMMON, [0x80], "MR_WAIT")
|
||||
with m.State("MR_WAIT"):
|
||||
with m.If(wait_ctr == self._reset_cycles):
|
||||
m.d.sync += wait_ctr.eq(0)
|
||||
m.next = "SHAR"
|
||||
with m.Else():
|
||||
m.d.sync += wait_ctr.eq(wait_ctr + 1)
|
||||
|
||||
# Step 2: SHAR = source MAC (6 bytes from PAR0–5).
|
||||
with m.State("SHAR"):
|
||||
m.d.sync += xfer_addr.eq(_W5500_SHAR)
|
||||
m.d.sync += xfer_ctrl.eq(_CTRL_WR_COMMON)
|
||||
m.d.sync += xfer_len.eq(6)
|
||||
for i in range(6):
|
||||
m.d.sync += wbuf[i].eq(mac_shadow[i])
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "SHAR_W"
|
||||
with m.State("SHAR_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.next = "S0_MR"
|
||||
|
||||
# Step 3–5: S0_MR=MACRAW, S0_CR=OPEN, S0_IMR=RECV|SEND_OK.
|
||||
write_reg("S0_MR", _W5500_S0_MR, _CTRL_WR_S0REG, [0x04], "S0_CR")
|
||||
write_reg("S0_CR", _W5500_S0_CR, _CTRL_WR_S0REG, [0x01], "S0_IMR")
|
||||
write_reg("S0_IMR", _W5500_S0_IMR, _CTRL_WR_S0REG, [0x05], "INIT_DONE")
|
||||
|
||||
with m.State("INIT_DONE"):
|
||||
m.d.sync += self.init_done.eq(1)
|
||||
m.next = "IDLE"
|
||||
|
||||
# ── TX path (MACRAW) ─────────────────────────────────────────
|
||||
# 1) read S0_TX_WR, 2) stream the frame into the TX buffer at that
|
||||
# offset, 3) advance S0_TX_WR by the byte count, 4) issue SEND.
|
||||
with m.State("TX_START"):
|
||||
m.d.sync += xfer_addr.eq(_W5500_S0_TX_WR)
|
||||
m.d.sync += xfer_ctrl.eq(_CTRL_RD_S0REG)
|
||||
m.d.sync += xfer_len.eq(2)
|
||||
m.d.sync += xfer_stream.eq(0)
|
||||
m.d.sync += wbuf[0].eq(0) # read → send 0x00 dummies
|
||||
m.d.sync += wbuf[1].eq(0)
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "TX_RDPTR_W"
|
||||
with m.State("TX_RDPTR_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += tx_wr.eq(Cat(rbuf[1], rbuf[0])) # big-endian
|
||||
m.next = "TX_DATA"
|
||||
|
||||
with m.State("TX_DATA"):
|
||||
m.d.sync += xfer_addr.eq(tx_wr)
|
||||
m.d.sync += xfer_ctrl.eq(_CTRL_WR_S0TX) # socket-0 TX buffer
|
||||
m.d.sync += xfer_stream.eq(1)
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "TX_DATA_W"
|
||||
with m.State("TX_DATA_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += xfer_stream.eq(0)
|
||||
m.d.sync += tx_wr.eq(tx_wr + s_count) # advanced pointer
|
||||
m.next = "TX_UPDPTR"
|
||||
|
||||
with m.State("TX_UPDPTR"):
|
||||
m.d.sync += xfer_addr.eq(_W5500_S0_TX_WR)
|
||||
m.d.sync += xfer_ctrl.eq(_CTRL_WR_S0REG)
|
||||
m.d.sync += xfer_len.eq(2)
|
||||
m.d.sync += xfer_stream.eq(0)
|
||||
m.d.sync += wbuf[0].eq(tx_wr[8:16]) # hi (already advanced)
|
||||
m.d.sync += wbuf[1].eq(tx_wr[0:8]) # lo
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "TX_UPDPTR_W"
|
||||
with m.State("TX_UPDPTR_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.next = "TX_SEND"
|
||||
|
||||
# S0_CR = SEND (0x20)
|
||||
write_reg("TX_SEND", _W5500_S0_CR, _CTRL_WR_S0REG, [0x20], "IDLE")
|
||||
|
||||
# ── RX path (MACRAW) ─────────────────────────────────────────
|
||||
# Triggered by W5500 INT (w5500_int_n low): read RX_RSR, read
|
||||
# RX_RD, read the 2-byte MACRAW length, stream the frame out,
|
||||
# advance RX_RD, issue RECV.
|
||||
with m.State("RX_CHECK"): # read S0_RX_RSR
|
||||
m.d.sync += xfer_addr.eq(_W5500_S0_RX_RSR)
|
||||
m.d.sync += xfer_ctrl.eq(_CTRL_RD_S0REG)
|
||||
m.d.sync += xfer_len.eq(2)
|
||||
m.d.sync += xfer_stream.eq(0)
|
||||
m.d.sync += xfer_sread.eq(0)
|
||||
m.d.sync += wbuf[0].eq(0)
|
||||
m.d.sync += wbuf[1].eq(0)
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "RX_RSR_W"
|
||||
with m.State("RX_RSR_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += rx_rsr.eq(Cat(rbuf[1], rbuf[0]))
|
||||
m.next = "RX_RSR_CHK"
|
||||
with m.State("RX_RSR_CHK"):
|
||||
with m.If(rx_rsr == 0):
|
||||
m.next = "IDLE" # nothing received
|
||||
with m.Else():
|
||||
m.next = "RX_RDPTR"
|
||||
|
||||
with m.State("RX_RDPTR"): # read S0_RX_RD
|
||||
m.d.sync += xfer_addr.eq(_W5500_S0_RX_RD)
|
||||
m.d.sync += xfer_ctrl.eq(_CTRL_RD_S0REG)
|
||||
m.d.sync += xfer_len.eq(2)
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "RX_RDPTR_W"
|
||||
with m.State("RX_RDPTR_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += rx_rd.eq(Cat(rbuf[1], rbuf[0]))
|
||||
m.next = "RX_LEN"
|
||||
|
||||
with m.State("RX_LEN"): # read 2-byte MACRAW length
|
||||
m.d.sync += xfer_addr.eq(rx_rd)
|
||||
m.d.sync += xfer_ctrl.eq(_CTRL_RD_S0RX)
|
||||
m.d.sync += xfer_len.eq(2)
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "RX_LEN_W"
|
||||
with m.State("RX_LEN_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += pkt_len.eq(Cat(rbuf[1], rbuf[0]))
|
||||
m.next = "RX_FRAME"
|
||||
|
||||
with m.State("RX_FRAME"): # stream pkt_len−2 frame bytes
|
||||
m.d.sync += xfer_addr.eq(rx_rd + 2)
|
||||
m.d.sync += xfer_ctrl.eq(_CTRL_RD_S0RX)
|
||||
m.d.sync += xfer_sread.eq(1)
|
||||
m.d.sync += xfer_rcount.eq(pkt_len - 2)
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "RX_FRAME_W"
|
||||
with m.State("RX_FRAME_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.d.sync += xfer_sread.eq(0)
|
||||
m.next = "RX_UPDRD"
|
||||
|
||||
with m.State("RX_UPDRD"): # S0_RX_RD += pkt_len
|
||||
m.d.sync += xfer_addr.eq(_W5500_S0_RX_RD)
|
||||
m.d.sync += xfer_ctrl.eq(_CTRL_WR_S0REG)
|
||||
m.d.sync += xfer_len.eq(2)
|
||||
m.d.sync += xfer_stream.eq(0)
|
||||
m.d.sync += xfer_sread.eq(0)
|
||||
m.d.sync += wbuf[0].eq((rx_rd + pkt_len)[8:16])
|
||||
m.d.sync += wbuf[1].eq((rx_rd + pkt_len)[0:8])
|
||||
m.d.sync += xfer_start.eq(1)
|
||||
m.next = "RX_UPDRD_W"
|
||||
with m.State("RX_UPDRD_W"):
|
||||
m.d.sync += xfer_start.eq(0)
|
||||
with m.If(xfer_done):
|
||||
m.next = "RX_RECV"
|
||||
|
||||
# S0_CR = RECV (0x40), then clear the RECV interrupt so INT_N
|
||||
# deasserts (write 1 to Sn_IR[2]); otherwise the FSM would re-enter
|
||||
# RX_CHECK forever on a real W5500.
|
||||
write_reg("RX_RECV", _W5500_S0_CR, _CTRL_WR_S0REG, [0x40], "RX_CLR_IR")
|
||||
write_reg("RX_CLR_IR", _W5500_S0_IR, _CTRL_WR_S0REG, [0x04], "IDLE")
|
||||
|
||||
return m
|
||||
|
||||
|
||||
# ── Testbench ─────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
from amaranth.sim import Simulator, Period
|
||||
|
||||
# Short reset wait so the init sequence runs quickly in simulation.
|
||||
dut = W5500SPIMaster(reset_cycles=10)
|
||||
errors = []
|
||||
|
||||
# MAC for SHAR: par[i*8:(i+1)*8] = mac byte i → mac = 11 22 33 44 55 66
|
||||
MAC = [0x11, 0x22, 0x33, 0x44, 0x55, 0x66]
|
||||
PAR = sum(b << (8 * i) for i, b in enumerate(MAC))
|
||||
|
||||
# Expected W5500 init transactions: [addr_hi, addr_lo, ctrl, *payload].
|
||||
# ctrl 0x04 = common-block write (VDM); 0x0C = socket-0-reg write (VDM).
|
||||
EXPECTED = [
|
||||
[0x00, 0x00, 0x04, 0x80], # MR = 0x80 (reset)
|
||||
[0x00, 0x09, 0x04, *MAC], # SHAR = MAC
|
||||
[0x00, 0x00, 0x0C, 0x04], # S0_MR = MACRAW
|
||||
[0x00, 0x01, 0x0C, 0x01], # S0_CR = OPEN
|
||||
[0x00, 0x2C, 0x0C, 0x05], # S0_IMR = RECV|SEND_OK
|
||||
]
|
||||
|
||||
txns = [] # transactions captured by the W5500 slave model
|
||||
|
||||
# RX frame the W5500 will hand back, and the MACRAW length it reports.
|
||||
RX_FRAME = [0xDE, 0xAD, 0xBE, 0xEF, 0x01, 0x02]
|
||||
RX_PKT_LEN = len(RX_FRAME) + 2 # MACRAW length includes the header
|
||||
|
||||
def build_response(bsb, addr):
|
||||
"""Bytes the W5500 drives on MISO for a read of (bsb, addr)."""
|
||||
if bsb == 1 and addr == _W5500_S0_RX_RSR:
|
||||
return [(RX_PKT_LEN >> 8) & 0xFF, RX_PKT_LEN & 0xFF]
|
||||
if bsb == 1 and addr == _W5500_S0_RX_RD:
|
||||
return [0x00, 0x00] # RX read pointer = 0
|
||||
if bsb == 3 and addr == 0x0000:
|
||||
return [(RX_PKT_LEN >> 8) & 0xFF, RX_PKT_LEN & 0xFF] # length
|
||||
if bsb == 3 and addr == 0x0002:
|
||||
return list(RX_FRAME) # frame payload
|
||||
return [0x00] * 64
|
||||
|
||||
async def w5500_model(ctx):
|
||||
"""W5500 SPI slave model: captures CS-framed transactions (MOSI) and,
|
||||
for reads, drives MISO with canned register/buffer data. Mode 0:
|
||||
MOSI sampled on rising SCK, MISO shifted out MSB-first.
|
||||
"""
|
||||
prev_cs, prev_sck = 1, 0
|
||||
rx_byte = rx_bits = nbytes = 0
|
||||
hdr = [0, 0, 0]
|
||||
is_read = False
|
||||
resp, ridx = [], 0
|
||||
msr = msr_bits = 0
|
||||
cur_txn = []
|
||||
async for vals in ctx.tick("sync").sample(
|
||||
dut.spi_cs_n, dut.spi_clk, dut.spi_mosi):
|
||||
cs, sck, mosi = vals[-3:]
|
||||
rising = (prev_sck == 0 and sck == 1)
|
||||
|
||||
if prev_cs == 1 and cs == 0: # CS falling: start frame
|
||||
cur_txn = []
|
||||
rx_byte = rx_bits = nbytes = 0
|
||||
is_read = False
|
||||
resp, ridx, msr, msr_bits = [], 0, 0, 0
|
||||
|
||||
if cs == 0 and rising:
|
||||
# MISO bit just sampled by the master → advance shift register
|
||||
if is_read and nbytes >= 3:
|
||||
msr = (msr << 1) & 0xFF
|
||||
msr_bits -= 1
|
||||
if msr_bits == 0:
|
||||
msr = resp[ridx] if ridx < len(resp) else 0
|
||||
ridx += 1
|
||||
msr_bits = 8
|
||||
# sample MOSI
|
||||
rx_byte = ((rx_byte << 1) | mosi) & 0xFF
|
||||
rx_bits += 1
|
||||
if rx_bits == 8:
|
||||
cur_txn.append(rx_byte)
|
||||
if nbytes < 3:
|
||||
hdr[nbytes] = rx_byte
|
||||
if nbytes == 2: # header complete → decode
|
||||
ctrl = hdr[2]
|
||||
is_read = (ctrl & 0x04) == 0
|
||||
bsb = ctrl >> 3
|
||||
addr = (hdr[0] << 8) | hdr[1]
|
||||
if is_read:
|
||||
resp = build_response(bsb, addr)
|
||||
msr, ridx, msr_bits = resp[0], 1, 8
|
||||
nbytes += 1
|
||||
rx_byte = rx_bits = 0
|
||||
|
||||
if prev_cs == 0 and cs == 1: # CS rising: end frame
|
||||
txns.append(list(cur_txn))
|
||||
|
||||
ctx.set(dut.spi_miso, (msr >> 7) & 1)
|
||||
prev_cs, prev_sck = cs, sck
|
||||
|
||||
rx_collected = []
|
||||
|
||||
async def rx_collector(ctx):
|
||||
async for vals in ctx.tick("sync").sample(
|
||||
dut.rx_valid, dut.rx_ready, dut.rx_data):
|
||||
valid, ready, data = vals[-3:]
|
||||
if valid and ready:
|
||||
rx_collected.append(data)
|
||||
|
||||
async def testbench(ctx):
|
||||
ctx.set(dut.par, PAR)
|
||||
await ctx.tick("sync").repeat(4)
|
||||
|
||||
# T1: SPI idle — CLK low (Mode 0), CS high
|
||||
if ctx.get(dut.spi_clk) != 0:
|
||||
errors.append("T1 CLK idle != 0")
|
||||
if ctx.get(dut.spi_cs_n) != 1:
|
||||
errors.append("T1 CS idle != 1")
|
||||
print(f"T1 idle: CLK={ctx.get(dut.spi_clk)} CS={ctx.get(dut.spi_cs_n)}")
|
||||
|
||||
# T2: run the init sequence
|
||||
ctx.set(dut.init_req, 1)
|
||||
await ctx.tick("sync").repeat(1)
|
||||
ctx.set(dut.init_req, 0)
|
||||
|
||||
for _ in range(4000):
|
||||
await ctx.tick("sync").repeat(1)
|
||||
if ctx.get(dut.init_done):
|
||||
break
|
||||
if not ctx.get(dut.init_done):
|
||||
errors.append("T2 init_done never asserted")
|
||||
await ctx.tick("sync").repeat(4)
|
||||
print(f"T2 init_done: {ctx.get(dut.init_done)}")
|
||||
|
||||
# T3: verify the captured init transaction sequence
|
||||
print(f"T3 captured {len(txns)} init transactions:")
|
||||
for t in txns:
|
||||
print(" ", [f"0x{b:02X}" for b in t])
|
||||
if txns != EXPECTED:
|
||||
errors.append(f"T3 init sequence mismatch:\n got {txns}\n want {EXPECTED}")
|
||||
|
||||
# ── T4: TX a frame (MACRAW) ──────────────────────────────────────
|
||||
txns.clear()
|
||||
FRAME = [0xAA, 0xBB, 0xCC, 0xDD]
|
||||
# With MISO=0 the read returns S0_TX_WR = 0x0000.
|
||||
TX_EXPECTED = [
|
||||
[0x00, 0x24, 0x08, 0x00, 0x00], # read S0_TX_WR (dummies)
|
||||
[0x00, 0x00, 0x14, *FRAME], # write TX buffer @ 0x0000
|
||||
[0x00, 0x24, 0x0C, 0x00, len(FRAME)], # S0_TX_WR += len
|
||||
[0x00, 0x01, 0x0C, 0x20], # S0_CR = SEND
|
||||
]
|
||||
|
||||
async def send_frame(frame):
|
||||
for i, b in enumerate(frame):
|
||||
ctx.set(dut.tx_data, b)
|
||||
ctx.set(dut.tx_valid, 1)
|
||||
ctx.set(dut.tx_sof, 1 if i == 0 else 0)
|
||||
ctx.set(dut.tx_eof, 1 if i == len(frame) - 1 else 0)
|
||||
for _ in range(2000):
|
||||
if ctx.get(dut.tx_ready):
|
||||
break
|
||||
await ctx.tick("sync").repeat(1)
|
||||
await ctx.tick("sync").repeat(1) # complete the consume
|
||||
ctx.set(dut.tx_valid, 0)
|
||||
ctx.set(dut.tx_sof, 0)
|
||||
ctx.set(dut.tx_eof, 0)
|
||||
|
||||
await send_frame(FRAME)
|
||||
# let the pointer-update + SEND transactions finish
|
||||
for _ in range(2000):
|
||||
await ctx.tick("sync").repeat(1)
|
||||
if len(txns) >= len(TX_EXPECTED):
|
||||
break
|
||||
await ctx.tick("sync").repeat(4)
|
||||
|
||||
print(f"T4 captured {len(txns)} TX transactions:")
|
||||
for t in txns:
|
||||
print(" ", [f"0x{b:02X}" for b in t])
|
||||
if txns != TX_EXPECTED:
|
||||
errors.append(f"T4 TX sequence mismatch:\n got {txns}\n want {TX_EXPECTED}")
|
||||
|
||||
# ── T5: RX a frame (MACRAW) ──────────────────────────────────────
|
||||
# The model returns RSR=pkt_len, RD=0, MACRAW length=pkt_len, then the
|
||||
# frame. Expected transactions (read dummies are 0x00):
|
||||
RX_EXPECTED = [
|
||||
[0x00, 0x26, 0x08, 0x00, 0x00], # read S0_RX_RSR
|
||||
[0x00, 0x28, 0x08, 0x00, 0x00], # read S0_RX_RD
|
||||
[0x00, 0x00, 0x18, 0x00, 0x00], # read MACRAW length
|
||||
[0x00, 0x02, 0x18, *([0x00] * len(RX_FRAME))], # read frame
|
||||
[0x00, 0x28, 0x0C, 0x00, RX_PKT_LEN], # S0_RX_RD += pkt_len
|
||||
[0x00, 0x01, 0x0C, 0x40], # S0_CR = RECV
|
||||
[0x00, 0x02, 0x0C, 0x04], # S0_IR clear RECV
|
||||
]
|
||||
txns.clear()
|
||||
ctx.set(dut.rx_ready, 1)
|
||||
ctx.set(dut.w5500_int_n, 0) # signal a received packet
|
||||
for _ in range(4000):
|
||||
await ctx.tick("sync").repeat(1)
|
||||
if len(txns) >= len(RX_EXPECTED):
|
||||
break
|
||||
ctx.set(dut.w5500_int_n, 1)
|
||||
await ctx.tick("sync").repeat(8)
|
||||
|
||||
print(f"T5 captured {len(txns)} RX transactions:")
|
||||
for t in txns:
|
||||
print(" ", [f"0x{b:02X}" for b in t])
|
||||
print(f"T5 rx frame: {[f'0x{b:02X}' for b in rx_collected]} "
|
||||
f"(want {[f'0x{b:02X}' for b in RX_FRAME]})")
|
||||
if txns != RX_EXPECTED:
|
||||
errors.append(f"T5 RX sequence mismatch:\n got {txns}\n want {RX_EXPECTED}")
|
||||
if rx_collected != RX_FRAME:
|
||||
errors.append(f"T5 RX frame mismatch: got {rx_collected}, want {RX_FRAME}")
|
||||
|
||||
sim = Simulator(dut)
|
||||
sim.add_clock(Period(MHz=24), domain="sync")
|
||||
sim.add_testbench(testbench)
|
||||
sim.add_process(w5500_model)
|
||||
sim.add_process(rx_collector)
|
||||
|
||||
with sim.write_vcd("W5500SPIMaster.vcd"):
|
||||
sim.run()
|
||||
|
||||
if errors:
|
||||
print("\nFAILURES:")
|
||||
for e in errors:
|
||||
print(" ", e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nAll tests passed.")
|
||||
+5
-5
@@ -1,10 +1,10 @@
|
||||
amaranth @ git+https://github.com/amaranth-lang/amaranth@main
|
||||
amaranth-boards @ git+https://github.com/amaranth-lang/amaranth-boards.git@7e24efe2f6e95afddd0c1b56f1a9423c48caa472
|
||||
amaranth-yosys==0.50.0.0.post115
|
||||
importlib_resources==6.5.2
|
||||
amaranth-boards @ git+https://github.com/amaranth-lang/amaranth-boards.git@8bc91db6f68c5c36f30926bf56836739c138986f
|
||||
amaranth-yosys==0.50.0.0.post124
|
||||
importlib_resources==7.1.0
|
||||
Jinja2==3.1.6
|
||||
jschon==0.11.1
|
||||
MarkupSafe==3.0.2
|
||||
MarkupSafe==3.0.3
|
||||
pyvcd==0.4.1
|
||||
rfc3986==2.0.0
|
||||
wasmtime==36.0.0
|
||||
wasmtime==45.0.0
|
||||
|
||||
Reference in New Issue
Block a user