-
{story['narrative']}
-
-
⚠️ Primary Barrier: {story['wall']}
-
🎯 Track Focus: {story['expect']}
+
+
+ ML Systems Engineering
+
+
+ Build the infrastructure that makes the model run reliably
+ at scale, within hardware constraints, in production.
+ Optimizes the 95%.
- """
- )
- return (story_card,)
+ """),
+ mo.md("""
+ A model that achieves 99% accuracy in a Jupyter notebook is **not a product**.
+ It becomes a product only when it can run in real-time on real hardware,
+ serve thousands of concurrent users, recover from failures, detect when it
+ degrades, and update without downtime. That is the engineering this course teaches.
+ """),
+ ])
+ return
+# ─────────────────────────────────────────────────────────────────────────────
+# CHECK 1
+# ─────────────────────────────────────────────────────────────────────────────
+
@app.cell
def _(mo):
- complexity_slider = mo.ui.slider(
- start=1, stop=100, step=1, value=80,
- label="2. ADAPT THE ARCHITECTURE: Set Model Size (%)"
+ check1 = mo.ui.radio(
+ options={
+ "A) The model architecture — choosing transformers over CNNs": "A",
+ "B) The training algorithm — selecting Adam vs SGD": "B",
+ "C) The serving infrastructure — how the model runs reliably in production": "C",
+ "D) The dataset size — gathering more labeled training examples": "D",
+ },
+ label="""**Check your understanding.** A startup ships a model with 94% accuracy.
+Six months later, accuracy has silently dropped to 81% in production — but no code
+has changed. As an ML Systems engineer, which part of the system is your *primary*
+domain for diagnosing and fixing this?""",
)
- return (complexity_slider,)
+ return (check1,)
@app.cell
-def _(budget, complexity_slider, dashboard, persona, story):
- # CALCULATE SUCCESS
- is_safe = complexity_slider.value <= budget
-
- header = dashboard.command_header(
- title=f"Command Center: {persona.name}",
- subtitle=f"Mission: {story['title']} at scale {persona.scale_factor:,.0f} {persona.unit_of_scale}",
- persona_name=persona.role,
- scale=f"{persona.scale_factor:,.0f} {persona.unit_of_scale}",
- constraints={"Physics Compliance": is_safe, "Deployment Ready": is_safe}
+def _(mo, check1):
+ mo.stop(
+ check1.value is None,
+ mo.vstack([
+ check1,
+ mo.callout(
+ mo.md("_Select an answer to continue._"),
+ kind="warn",
+ ),
+ ])
)
- return (is_safe,)
-
-@app.cell
-def _(
- complexity_slider,
- dashboard,
- is_safe,
- mo,
- persona,
- persona_selector,
- story_card,
-):
- # TAB 1: BRIEFING
- briefing_content = mo.vstack([
- mo.md("### 🏁 Phase 1: Identity & Context"),
- persona_selector,
- story_card,
- dashboard.pro_note(
- "Architect Selection",
- "Choose your path. Each role below represents a different scale of the AI Triad."
- )
- ])
-
- # TAB 2: SIMULATION
- simulation_content = mo.vstack([
- mo.md("### 🎛️ Phase 2: The Balancing Act"),
- dashboard.pro_note(
- "The Crisis at Hand",
- f"Your current model complexity is at
{complexity_slider.value}%. " +
- (f"The Physics Compliance badge is red! You are violating the laws of the
{persona.primary_constraint}. Reduce complexity to recover." if not is_safe else "The system is balanced. You are ready for the final audit.")
+ _correct = check1.value == "C"
+ _feedback = {
+ "A": (
+ "**Not quite.** The architecture hasn't changed — the model itself is unchanged. "
+ "The issue is that the *world* changed while the model stayed fixed. "
+ "Model architecture is an ML concern; detecting and responding to drift "
+ "is a *systems* concern — monitoring, pipelines, retraining triggers."
),
- dashboard.layout_cockpit(
- dashboard.lever_panel(mo.vstack([
- mo.md("Adjust the lever below to resize your model."),
- complexity_slider
- ])),
- dashboard.telemetry_panel(mo.vstack([
- mo.md(f"#### instrument: {persona.name}"),
- mo.md(f"
Current Load: {complexity_slider.value}%"),
- mo.md(f"
Constraint: {persona.primary_constraint}"),
- mo.md("---"),
- mo.md(f"
{'✅ SYSTEM NOMINAL' if is_safe else '⚠️ PHYSICAL OVERLOAD'}
"),
- ]), color="#3182ce" if is_safe else "#e53e3e"),
- mo.md("") # Audit trail moved to Tab 3
- )
- ])
+ "B": (
+ "**Not quite.** The training algorithm only runs during training. "
+ "Once the model is deployed, SGD vs Adam no longer matters. "
+ "The degradation happened in production — that's the systems layer: "
+ "monitoring, data pipelines, serving infrastructure."
+ ),
+ "C": (
+ "**Correct.** The model hasn't changed — but the world it's operating in has. "
+ "This is *silent degradation*, one of the defining challenges of ML systems. "
+ "Your job is not to debug code; it's to build monitoring that detects when "
+ "production data drifts away from training data, and pipelines that respond. "
+ "That's the 95%."
+ ),
+ "D": (
+ "**Not quite.** More training data would help if you were retraining — "
+ "but the immediate problem is that you don't even *know* the model is degrading "
+ "until someone complains. The systems problem is the absence of monitoring. "
+ "Data collection is part of the solution, but detecting the problem comes first."
+ ),
+ }
- # TAB 3: DEPLOYMENT
- deployment_content = mo.vstack([
- mo.md("### 📝 Phase 3: Audit & Deployment"),
- dashboard.audit_panel(mo.vstack([
- mo.md("Before we deploy to the fleet, you must provide the engineering rationale for your chosen complexity level."),
- mo.ui.text_area(placeholder="Why is this setup optimal for your mission?", label="Justification"),
- mo.center(mo.ui.button(label="🚀 Deploy to Fleet", kind="success", disabled=not is_safe))
- ]))
- ])
+ _color = "#f0fdf4" if _correct else "#fef2f2"
+ _border = "#16a34a" if _correct else "#ef4444"
+ _icon = "✅" if _correct else "⚠️"
- # ASSEMBLE TABS
- tabs = mo.ui.tabs({
- "1. Briefing": briefing_content,
- "2. Simulation": simulation_content,
- "3. Deployment": deployment_content
- })
+ mo.vstack([
+ check1,
+ mo.Html(f"""
+
+
+ {_icon} {"Correct" if _correct else "Not quite"}
+
+
+ {_feedback[check1.value]}
+
+
+ """),
+ ])
+ return
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# CONCEPT 2: PHYSICAL CONSTRAINTS PARTITION DEPLOYMENT
+# ─────────────────────────────────────────────────────────────────────────────
+
+@app.cell
+def _(mo, check1):
+ mo.stop(check1.value is None)
+
+ mo.vstack([
+ mo.md("---"),
+ mo.md("""
+ ## Why Constraints Drive Architecture
+
+ The same model cannot simply be "resized" to run everywhere.
+ Three physical laws carve the deployment landscape into distinct regimes
+ that no amount of software engineering can bridge:
+ """),
+ mo.Html("""
+
+
+
+
⚡
+
+ The Speed of Light
+
+
+ London to New York = 36 ms minimum round-trip, one-way.
+ A self-driving car that needs a 10 ms decision loop
+ cannot route to a remote datacenter.
+ Physics sets this floor. No GPU upgrade helps.
+
+
+
+
+
🌡️
+
+ Thermodynamics
+
+
+ Heat accumulates faster than a small enclosure can dissipate it.
+ A smartphone running a heavy model continuously
+ throttles its processor after 90 seconds.
+ No software fix prevents heat.
+
+
+
+
+
💾
+
+ Memory Physics
+
+
+ Moving data through memory costs energy and takes time.
+ A microcontroller with 256 KB of SRAM
+ cannot page memory from disk.
+ If the model doesn't fit, it doesn't run.
+
+
+
+
+ """),
+ mo.md("""
+ These three constraints — latency floors, power limits, and memory capacity —
+ divide the world into four fundamentally different deployment environments.
+ Engineers who treat deployment as an afterthought collide with these walls
+ after months of architectural work.
+
+ **The insight of ML Systems engineering:** choose your regime *first*,
+ because the physics of that regime constrains every design decision that follows.
+ """),
+ ])
+ return
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# CHECK 2 (multi-select — more than one correct answer)
+# ─────────────────────────────────────────────────────────────────────────────
+
+@app.cell
+def _(mo, check1):
+ mo.stop(check1.value is None)
+
+ check2 = mo.ui.multiselect(
+ options={
+ "Use a smaller model with fewer parameters": "model_size",
+ "Apply INT8 quantization to reduce precision": "quantization",
+ "Move the datacenter server physically closer": "move_server",
+ "Use a faster GPU with higher TFLOPS": "faster_gpu",
+ "Deploy the model directly on the vehicle": "edge_deploy",
+ },
+ label="""**Check your understanding.** An autonomous vehicle perception system
+is routed to a cloud datacenter 2,000 km away. Round-trip latency is 40 ms.
+The safety requirement is a 10 ms end-to-end decision loop.
+
+Select **all approaches** that could actually solve the latency problem:""",
+ )
+ return (check2,)
+
+
+@app.cell
+def _(mo, check1, check2):
+ mo.stop(check1.value is None or len(check2.value) == 0)
+
+ _correct_set = {"move_server", "edge_deploy"}
+ _selected = set(check2.value)
+ _exactly_right = _selected == _correct_set
+ _has_wrong = bool(_selected - _correct_set)
+ _missing_right = bool(_correct_set - _selected)
+
+ _option_labels = {
+ "model_size": "Use a smaller model",
+ "quantization": "Apply INT8 quantization",
+ "move_server": "Move the server physically closer",
+ "faster_gpu": "Use a faster GPU",
+ "edge_deploy": "Deploy on the vehicle",
+ }
+
+ _rows = ""
+ for _key, _label in _option_labels.items():
+ _is_selected = _key in _selected
+ _is_correct = _key in _correct_set
+ if _is_selected and _is_correct:
+ _icon, _bg, _col = "✅", "#f0fdf4", "#15803d"
+ elif _is_selected and not _is_correct:
+ _icon, _bg, _col = "❌", "#fef2f2", "#dc2626"
+ elif not _is_selected and _is_correct:
+ _icon, _bg, _col = "◉", "#fffbeb", "#d97706"
+ else:
+ _icon, _bg, _col = "○", "#f8fafc", "#94a3b8"
+ _rows += f"""
+
+ {_icon}
+
+ {_label}
+
+
"""
+
+ _explanation = """
+
+ The physics: The 40 ms latency comes from the speed of light
+ across 2,000 km of fiber — approximately 200,000 km/s.
+ No software change, no GPU upgrade, no model compression
+ removes this physical floor.
+ Smaller models and faster GPUs reduce
+ compute time, but the round-trip latency is dominated by
+ propagation delay — they don't help.
+ Moving the server physically closer or
+ deploying directly on the vehicle are the only solutions
+ because they reduce the distance the signal must travel.
+ This is why Edge ML exists as a deployment paradigm — not as a preference,
+ but as a physical necessity.
+
+ """
+
+ _title = "✅ Exactly right." if _exactly_right else (
+ "Partially right — review the highlighted options." if not _has_wrong else
+ "Not quite — some selections add compute speed, not reduce propagation delay."
+ )
+ _border = "#16a34a" if _exactly_right else ("#f59e0b" if not _has_wrong else "#ef4444")
+ _bg_outer = "#f0fdf4" if _exactly_right else ("#fffbeb" if not _has_wrong else "#fef2f2")
+
+ mo.vstack([
+ check2,
+ mo.Html(f"""
+
+
{_title}
+ {_rows}
+ {_explanation}
+
+ """),
+ ])
+ return
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# CONCEPT 3: THE DEPLOYMENT REGIMES
+# ─────────────────────────────────────────────────────────────────────────────
+
+@app.cell
+def _(mo, check1, check2):
+ mo.stop(check1.value is None or len(check2.value) == 0)
+
+ mo.vstack([
+ mo.md("---"),
+ mo.md("""
+ ## The Four Physical Regimes
+
+ The physical constraints above don't create a continuum — they create
+ **four distinct operating envelopes**, each demanding different infrastructure,
+ different optimization strategies, and different definitions of "correct."
+ """),
+ mo.Html("""
+
+
+
+
+
☁️
+
+
Cloud ML
+
+ Binding constraint: Memory Bandwidth Wall
+
+
+
+
+ Virtually unlimited compute and storage. The binding constraint
+ is not processing power — it is how fast data can move from
+ memory to compute cores. Most large models are memory-bandwidth-bound,
+ not compute-bound.
+
+
+ Latency: 100–500 ms · Power: kilowatts · Memory: terabytes
+
+
+
+
+
+
🤖
+
+
Edge ML
+
+ Binding constraint: Latency Determinism Wall
+
+
+
+
+ Computation happens near the data source — factory floors,
+ vehicles, hospitals. The binding constraint is not average latency
+ but tail latency: a single spike in a safety-critical system
+ is a failure, not a statistic.
+
+
+ Latency: 10–100 ms · Power: watts–tens of watts · Memory: gigabytes
+
+
+
+
+
+
📱
+
+
Mobile ML
+
+ Binding constraint: Thermal Power Wall
+
+
+
+
+ Intelligence runs directly on consumer devices. Compute capability
+ is substantial, but sustained operation is limited by heat
+ accumulation in a sealed, handheld enclosure. After thermal
+ throttling, performance drops by 30–70%.
+
+
+ Latency: 5–50 ms · Power: 3–5 W sustained · Memory: 4–16 GB
+
+
+
+
+
+
👂
+
+
TinyML
+
+ Binding constraint: SRAM Capacity Wall
+
+
+
+
+ Always-on intelligence in microcontrollers running on
+ coin-cell batteries. There is no operating system, no virtual
+ memory, no paging. If the model does not fit in 256 KB of SRAM,
+ it does not run. Every byte is a resource allocation decision.
+
+
+ Latency: 1–10 ms · Power: microwatts–milliwatts · Memory: kilobytes
+
+
+
+
+ """),
+ mo.callout(
+ mo.md(
+ "**Nine orders of magnitude** separate the largest cloud deployment "
+ "(megawatts, terabytes) from the smallest TinyML device (microwatts, kilobytes). "
+ "The engineering principles that govern one end of this spectrum "
+ "do not transfer to the other. This is why ML Systems is a discipline, "
+ "not a configuration setting."
+ ),
+ kind="info",
+ ),
+ ])
+ return
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# CHECK 3 (ranking via radio — forces active reasoning about trade-offs)
+# ─────────────────────────────────────────────────────────────────────────────
+
+@app.cell
+def _(mo, check1, check2):
+ mo.stop(check1.value is None or len(check2.value) == 0)
+
+ check3 = mo.ui.radio(
+ options={
+ "A) Cloud ML — access to the most compute": "A",
+ "B) Edge ML — low latency and local processing": "B",
+ "C) Mobile ML — runs on the patient's own device": "C",
+ "D) TinyML — lowest power, can run for months on a battery": "D",
+ },
+ label="""**Check your understanding.** A hospital wants to deploy an AI system
+that detects sepsis from ICU sensor readings. Requirements: results within 2 ms of
+each sensor reading, no patient data can leave the hospital network, and the sensor
+node must run for 6 months on a small battery without replacement.
+
+Which deployment paradigm is the *only* one that satisfies all three requirements simultaneously?""",
+ )
+ return (check3,)
+
+
+@app.cell
+def _(mo, check1, check2, check3):
+ mo.stop(check1.value is None or len(check2.value) == 0 or check3.value is None)
+
+ _correct = check3.value == "D"
+ _feedback = {
+ "A": (
+ "**Not quite.** Cloud ML violates two of the three requirements. "
+ "Round-trip latency to a cloud server is 10–500 ms — far above the 2 ms requirement. "
+ "And patient data would leave the hospital network, violating the privacy constraint. "
+ "Cloud gives you power, but power is not the binding constraint here."
+ ),
+ "B": (
+ "**Closer, but not sufficient.** Edge ML achieves low latency and local processing, "
+ "satisfying the first two requirements. But an edge server draws tens of watts "
+ "continuously — it cannot run for 6 months on a small battery. "
+ "The power constraint eliminates it. Edge is right for latency; wrong for energy."
+ ),
+ "C": (
+ "**Not quite.** Mobile ML runs locally (satisfying privacy) and can meet the "
+ "latency target, but sustained operation at smartphone-level power draws "
+ "3–5 W. A small sensor battery would last hours, not months. "
+ "The energy envelope makes mobile ML infeasible for always-on sensing."
+ ),
+ "D": (
+ "**Correct.** TinyML is the only paradigm that satisfies all three simultaneously. "
+ "Inference happens directly on the sensor node — no network latency, no data "
+ "leaving the hospital. Microcontrollers running at microwatts can sustain "
+ "always-on sensing for months on a coin-cell battery. "
+ "The model must fit in kilobytes — that is the engineering challenge this regime imposes. "
+ "Notice: this was not a software preference. It was a constraint analysis."
+ ),
+ }
+
+ _color = "#f0fdf4" if _correct else "#fef2f2"
+ _border = "#16a34a" if _correct else "#ef4444"
+ _icon = "✅" if _correct else "⚠️"
+
+ mo.vstack([
+ check3,
+ mo.Html(f"""
+
+
+ {_icon} {"Correct" if _correct else "Not quite"}
+
+
+ {_feedback[check3.value]}
+
+
+ """),
+ ])
+ return
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# SECTION 4: LAB INTERFACE ORIENTATION
+# Shows students the exact UI components they will encounter from Lab 01 onward.
+# Zero new physics content — pure scaffolding of look-and-feel.
+# ─────────────────────────────────────────────────────────────────────────────
+
+@app.cell
+def _(mo, check1, check2, check3):
+ mo.stop(check1.value is None or len(check2.value) == 0 or check3.value is None)
+
+ mo.vstack([
+ mo.md("---"),
+ mo.md("""
+ ## How Every Lab in This Curriculum Is Structured
+
+ Starting from Lab 01, every lab follows the same **four-zone cockpit layout**.
+ This is not aesthetic — it is a deliberate information architecture that
+ separates *what you control* from *what the system tells you*.
+
+ Before you begin Lab 01, spend two minutes with the interactive tour below.
+ You will recognize every element the moment you see it.
+ """),
+ ])
return
@app.cell
-def _(view):
- view
+def _(mo, check1, check2, check3, COLORS):
+ mo.stop(check1.value is None or len(check2.value) == 0 or check3.value is None)
+
+ # ── ZONE ANATOMY DIAGRAM ─────────────────────────────────────────
+ _zone_html = """
+
+
+
+
+ Zone 1 · Command Header
+
+
+ Lab number, scenario title, your active persona, and live
+ constraint badges (Latency, Power, Memory).
+ Badges turn red the moment you violate a constraint —
+ the system doesn't wait to tell you. Always visible.
+
+
+
+
+
+ Zone 2 · Engineering Levers
+
+
+ Sliders, dropdowns, and toggles that modify your design —
+ hardware target, batch size, precision, model variant.
+ Every change recalculates everything instantly.
+ No "Submit" button. The lab reacts in real-time.
+
+
+
+
+
+ Zone 3 · Live Telemetry
+
+
+ Metric cards, Roofline chart (from Lab 11), Latency Waterfall
+ (from Lab 02). All charts update as you move sliders.
+ Your job is to read these instruments and trace cause to effect.
+
+
+
+
+
+ Zone 4 · Audit Trail
+
+
+ Consequence log, explanatory text, and a free-form rationale box.
+ Explain your design decision in writing before
+ submitting. The act of articulating trade-offs is the learning —
+ not the number the simulator returns.
+
+
+
+
+ """
+
+ # ── LIVE COMPONENT TOUR via mo.ui.tabs ────────────────────────────
+ _tab_overview = mo.vstack([
+ mo.md("""
+ **`mo.ui.tabs`** — labs with multiple acts use tab navigation.
+ Each tab is a self-contained section. You are looking at a live example right now.
+
+ In later labs, tabs structure the workflow:
+ ```
+ Act I: Baseline → establish the initial state
+ Act II: Intervention → apply an optimization
+ ```
+ The tab structure ensures you *commit* to a baseline before modifying it.
+ This is not UX convenience — it enforces the scientific method: measure before you change.
+ """),
+ mo.callout(
+ mo.md("Switch between tabs above to navigate. Your work in each tab is preserved independently."),
+ kind="info"
+ ),
+ ])
+
+ _tab_levers = mo.vstack([
+ mo.md("**Zone 2 levers** update the system state reactively. Here is a live example:"),
+ mo.hstack([
+ mo.vstack([
+ mo.md("**Hardware target**"),
+ mo.ui.dropdown(
+ options=["H100 (Cloud)", "Jetson Orin NX (Edge)", "Smartphone NPU (Mobile)", "Cortex-M7 (TinyML)"],
+ value="H100 (Cloud)",
+ label="Select hardware:"
+ ),
+ mo.md("**Batch size**"),
+ mo.ui.slider(start=1, stop=128, step=1, value=32, label="Batch size:"),
+ ], gap=1),
+ mo.Html(f"""
+
+
Live Telemetry Preview
+
+ Latency: 12.4 ms
+ Throughput: 2,580 tok/s
+ Memory: 34.2 GB
+ MFU: 47%
+
+
+ In real labs these numbers
update as you move sliders.
+
+
+ """),
+ ], gap=2, justify="start"),
+ mo.callout(
+ mo.md("**Key insight:** Every lever connects to every metric. Changing batch size affects memory, which affects throughput, which affects cost. The cockpit shows all effects simultaneously."),
+ kind="warn",
+ ),
+ ])
+
+ _tab_prediction = mo.vstack([
+ mo.md("""
+ **The Prediction Lock** — the most important component in the curriculum.
+
+ Before every Act in Labs 01–14, you will see a **Prediction Lock** like the one below.
+ You must commit to a prediction *before* you can run the simulation.
+ """),
+ mo.Html("""
+
+
+ 🔒 Prediction Lock — Act I
+
+
+ Scenario: You double the batch size from 32 to 64 on an H100.
+ The model is memory-bandwidth-bound.
+ Predict: Will throughput (tokens/second) increase,
+ decrease, or stay approximately the same?
+
+
+
+ A) Increase proportionally (~2×)
+
+
+ B) Increase sub-linearly
+
+
+ C) Stay the same
+
+
+
+ ↑ In a real lab, selecting an answer here unlocks the simulation instruments below.
+
+
+ """),
+ mo.md("""
+ **Why this matters:** Research on deliberate practice shows that making an
+ explicit prediction before observing a result dramatically increases retention.
+ If your prediction is wrong, you experience *productive failure* — the gap
+ between expectation and observation drives deeper encoding than passive reading.
+
+ The prediction lock is not a gatekeeping mechanism. It is a learning amplifier.
+ """),
+ ])
+
+ _tab_mathpeek = mo.vstack([
+ mo.md("""
+ **`MathPeek` accordion** — the invariant behind every instrument.
+
+ Every chart and metric in the telemetry panel connects to a physical equation.
+ The MathPeek accordion surfaces that equation on demand — you are never just
+ moving sliders, you are probing the underlying physics.
+ """),
+ mo.accordion({
+ "📐 View the Invariant — Iron Law of ML Systems (Preview)": mo.md("""
+ **Formula:** `T = D/BW + O/R + L`
+
+ **Components:**
+ - **T** — Total end-to-end latency (seconds)
+ - **D** — Data size (bytes moved across memory hierarchy)
+ - **BW** — Memory bandwidth (bytes/second)
+ - **O** — FLOPs required (floating-point operations)
+ - **R** — Compute rate (FLOPs/second, hardware peak × MFU)
+ - **L** — Fixed overhead latency (dispatch tax, network RTT)
+
+ _This equation is the central object of the entire curriculum.
+ You will encounter it in every lab. Open this accordion whenever
+ you need to re-anchor a number to first principles._
+ """),
+ }),
+ mo.callout(
+ mo.md("**Lab 01** introduces the Iron Law formally. For now, recognize the accordion — it lives in every lab."),
+ kind="info",
+ ),
+ ])
+
+ _tour_tabs = mo.ui.tabs({
+ "🏗️ Cockpit Anatomy": _tab_overview,
+ "🎛️ Live Levers": _tab_levers,
+ "🔒 Prediction Lock": _tab_prediction,
+ "📐 MathPeek": _tab_mathpeek,
+ })
+
+ mo.vstack([
+ _tour_tabs,
+ mo.Html("""
+
+
✅
+
+ Interface orientation complete.
+ You now recognize the four-zone cockpit, the live lever pattern, the
+ prediction lock, and the MathPeek accordion. These are the only UI
+ primitives used across all 14 labs — nothing new will be introduced
+ without explanation.
+
+
+ """),
+ ])
+ return
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# DEPLOYMENT CONTEXT SELECTION
+# ─────────────────────────────────────────────────────────────────────────────
+
+@app.cell
+def _(mo, check1, check2, check3):
+ mo.stop(
+ check1.value is None or len(check2.value) == 0 or check3.value is None,
+ mo.md("_Complete all three checks above to unlock your deployment context selection._")
+ )
+
+ mo.vstack([
+ mo.md("---"),
+ mo.md("""
+ ## Choose Your Physical Regime
+
+ You have now seen why deployment context is a first-order engineering decision,
+ not an afterthought. For the next 15 labs, you will carry one deployment context
+ as your primary lens — the physical regime whose constraints will test every
+ optimization technique you learn.
+
+ **This is not a career choice.** It is a choice of which physical law will
+ be your primary adversary. You will understand all four regimes —
+ but you will develop deep intuition for one.
+ """),
+ ])
+ return
+
+
+@app.cell
+def _(mo, check1, check2, check3):
+ mo.stop(check1.value is None or len(check2.value) == 0 or check3.value is None)
+
+ context_selector = mo.ui.radio(
+ options={
+ "☁️ Cloud ML — your constraint is the Memory Bandwidth Wall": "cloud",
+ "🤖 Edge ML — your constraint is the Latency Determinism Wall": "edge",
+ "📱 Mobile ML — your constraint is the Thermal Power Wall": "mobile",
+ "👂 TinyML — your constraint is the SRAM Capacity Wall": "tiny",
+ },
+ label="Select the deployment regime you will focus on throughout this curriculum:",
+ )
+ return (context_selector,)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# CONTEXT REVEAL + STAKEHOLDER MESSAGE + LEDGER INIT
+# ─────────────────────────────────────────────────────────────────────────────
+
+@app.cell
+def _(mo, check1, check2, check3, context_selector, ledger, COLORS):
+ mo.stop(
+ check1.value is None
+ or len(check2.value) == 0
+ or check3.value is None
+ or context_selector.value is None,
+ mo.vstack([
+ context_selector,
+ mo.md("_Select your deployment context above._"),
+ ])
+ )
+
+ _key = context_selector.value
+ _contexts = {
+ "cloud": {
+ "color": COLORS["BlueLine"],
+ "bg": COLORS["BlueL"],
+ "label": "Cloud ML",
+ "nemesis": "Memory Bandwidth Wall",
+ "role": "LLM Infrastructure Lead",
+ "north_star":"Maximize sustained serving throughput for a 70B-parameter model on a multi-GPU cluster.",
+ "persona": "Your CTO",
+ "quote": (
+ "We're burning $40,000 a day on GPU rentals. "
+ "If hardware utilization doesn't hit 50% by next quarter, "
+ "we run out of runway. The model is fine. The infrastructure is not. Fix it."
+ ),
+ "arc": [
+ ("Labs 01–04", "Foundations",
+ "Learn the D·A·M taxonomy, the Iron Law, and why the Memory Wall is your primary constraint"),
+ ("Labs 05–08", "Build",
+ "Trace memory allocation through a transformer forward pass; profile your serving stack"),
+ ("Labs 09–11", "Optimize",
+ "Apply quantization, understand hardware utilization, and cross the efficiency threshold"),
+ ("Labs 12–14", "Deploy",
+ "Benchmark, monitor, and operate a production serving system at scale"),
+ ],
+ },
+ "edge": {
+ "color": COLORS["RedLine"],
+ "bg": COLORS["RedL"],
+ "label": "Edge ML",
+ "nemesis": "Latency Determinism Wall",
+ "role": "Autonomous Systems Lead",
+ "north_star":"Maintain a deterministic 10 ms perception-to-decision loop on a Jetson Orin NX.",
+ "persona": "Your Safety Director",
+ "quote": (
+ "A 5 ms latency spike added 15 cm of stopping distance at 60 mph. "
+ "That is a regulatory failure. I do not care about your average latency. "
+ "One tail event is one too many. Zero tolerance."
+ ),
+ "arc": [
+ ("Labs 01–04", "Foundations",
+ "Understand latency decomposition, the Iron Law, and why P99 is the only metric that matters"),
+ ("Labs 05–08", "Build",
+ "Implement a priority scheduler; measure the tail-latency distribution of your inference stack"),
+ ("Labs 09–11", "Optimize",
+ "Apply structured pruning to reduce worst-case latency below the safety threshold"),
+ ("Labs 12–14", "Deploy",
+ "Validate deterministic SLAs on physical edge hardware under adversarial load"),
+ ],
+ },
+ "mobile": {
+ "color": COLORS["OrangeLine"],
+ "bg": COLORS["OrangeL"],
+ "label": "Mobile ML",
+ "nemesis": "Thermal Power Wall",
+ "role": "Smartphone App Architect",
+ "north_star":"Run 60 FPS real-time on-device inference within a 2 W sustained thermal envelope.",
+ "persona": "Your UX Director",
+ "quote": (
+ "Users are returning the device because it heats up after two minutes of AR. "
+ "You have 2 Watts of sustained thermal headroom. Not 2.1. Two. "
+ "Every watt you save is a feature."
+ ),
+ "arc": [
+ ("Labs 01–04", "Foundations",
+ "Map the D·A·M trade-off for mobile NPUs; quantify the thermal budget"),
+ ("Labs 05–08", "Build",
+ "Implement MobileNetV2 with depthwise separable convolutions in TinyTorch"),
+ ("Labs 09–11", "Optimize",
+ "Apply INT8 quantization and operator fusion to stay within the thermal envelope"),
+ ("Labs 12–14", "Deploy",
+ "Benchmark sustained throughput on a power-constrained device under realistic workloads"),
+ ],
+ },
+ "tiny": {
+ "color": COLORS["GreenLine"],
+ "bg": COLORS["GreenL"],
+ "label": "TinyML",
+ "nemesis": "SRAM Capacity Wall",
+ "role": "TinyML / Embedded Systems Lead",
+ "north_star":"Fit real-time keyword spotting in under 256 KB SRAM, running under 1 mW.",
+ "persona": "Your Hardware Lead",
+ "quote": (
+ "We have 256 KB of on-chip SRAM. Every weight byte you keep "
+ "is audio buffer you lose. There is no paging. There is no swap. "
+ "If it does not fit, it does not run."
+ ),
+ "arc": [
+ ("Labs 01–04", "Foundations",
+ "Count every byte in a DS-CNN keyword spotting model; understand SRAM allocation"),
+ ("Labs 05–08", "Build",
+ "Implement depthwise separable convolutions in TinyTorch; profile memory layout"),
+ ("Labs 09–11", "Optimize",
+ "Achieve 4× compression via magnitude pruning and INT8 quantization"),
+ ("Labs 12–14", "Deploy",
+ "Fit the full inference pipeline in 256 KB and validate on a physical MCU"),
+ ],
+ },
+ }
+
+ _t = _contexts[_key]
+
+ # Persist to Design Ledger
+ ledger.save(chapter=0, design={
+ "deployment_context": _key,
+ "check1_answer": check1.value,
+ "check1_correct": check1.value == "C",
+ "check2_selections": list(check2.value),
+ "check3_answer": check3.value,
+ "check3_correct": check3.value == "D",
+ })
+
+ _arc_rows = "".join([
+ f"""
+ | {phase} |
+ {label} |
+ {desc} |
+
"""
+ for phase, label, desc in _t["arc"]
+ ])
+
+ mo.vstack([
+ context_selector,
+ mo.md("---"),
+
+ # Stakeholder message
+ mo.Html(f"""
+
+
+ Incoming Message · {_t['persona']}
+
+
+ "{_t['quote']}"
+
+
+ """),
+
+ # Mission card
+ mo.Html(f"""
+
+
+ 🎖️ Deployment Context Confirmed
+
+
+ {_t['label']} · {_t['role']}
+
+
+ North Star: {_t['north_star']}
+
+
+ Arch Nemesis:
+ {_t['nemesis']}
+
+
+ Your 14-Lab Arc
+
+
+
+ """),
+
+ mo.callout(
+ mo.md(
+ f"**Design Ledger initialized** — context: `{_key}`. "
+ "Your deployment regime pre-loads hardware defaults and scenario constraints "
+ "in every lab from Lab 01 onward. Proceed to **Lab 01: ML Introduction**."
+ ),
+ kind="success",
+ ),
+ ])
+ return
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# DESIGN LEDGER HUD (always visible)
+# ─────────────────────────────────────────────────────────────────────────────
+
+@app.cell
+def _(mo, ledger, COLORS):
+ _track = ledger.get_track() or "NONE"
+ _color_map = {
+ "cloud": COLORS["BlueLine"],
+ "edge": COLORS["RedLine"],
+ "mobile": COLORS["OrangeLine"],
+ "tiny": COLORS["GreenLine"],
+ "NONE": "#475569",
+ }
+ _hud_color = _color_map.get(_track, "#475569")
+ _hud_status = "Uninitialized" if _track == "NONE" else "Active — Chapter 0"
+
+ mo.Html(f"""
+
+
🗂️ DESIGN LEDGER
+
+ Context:
+ {_track.upper()}
+
+
+ Chapter:
+ 0
+
+
+ Status:
+ {_hud_status}
+
+
+ """)
return