diff --git a/labs/vol1/__marimo__/session/lab_00_introduction.py.json b/labs/vol1/__marimo__/session/lab_00_introduction.py.json index d4cf73914..39bf98cc3 100644 --- a/labs/vol1/__marimo__/session/lab_00_introduction.py.json +++ b/labs/vol1/__marimo__/session/lab_00_introduction.py.json @@ -6,7 +6,7 @@ "cells": [ { "id": "Hbol", - "code_hash": "d1e137ab08858d9220092662911a6492", + "code_hash": "ddf5f33f9e09f77c352bce879bc06a23", "outputs": [ { "type": "data", @@ -19,12 +19,12 @@ }, { "id": "MJUe", - "code_hash": "33c294b315853e0a7442dadb2c4be01b", + "code_hash": "e7fb484f9c20fb8cdd29e615e072214b", "outputs": [ { "type": "data", "data": { - "text/plain": "" + "text/html": "
\n\n
\n
\n Machine Learning Systems \u00b7 Volume I \u00b7 Lab 00\n
\n

\n The Architect's Portal\n

\n

\n This course is not about machine learning. It is about the infrastructure\n that makes machine learning possible \u2014 and the physical laws that govern it.\n \n

\n \n Orientation \u00b7 3 Concept Checks \u00b7 Interface Tour\n \n \n 20\u201325 min\n \n \n No prior reading required\n \n
\n
" } } ], @@ -32,12 +32,12 @@ }, { "id": "vblA", - "code_hash": "37f6a477d1e84e01fe3f525d912a417b", + "code_hash": "c99e97c8c1fe662b0b310749f4fde94c", "outputs": [ { "type": "data", "data": { - "text/plain": "" + "text/html": "

The 95% Problem

\nWhen Google published a study of their internal ML systems in 2015, they found\nsomething that surprised the field. In a production ML system, the actual model \u2014\nthe neural network, the training algorithm, the matrix math \u2014 accounts for roughly\n5% of the total codebase.\nThe other 95% is infrastructure: data pipelines, serving systems, monitoring,\nhardware resource management, configuration, feature stores, deployment tooling.\nThis has a direct implication for how you should think about your role as an engineer:
\n
\n
\n
\n ML Engineering\n
\n
\n Build and improve the model. Choose the architecture.\n Tune hyperparameters. Improve accuracy.

\n Optimizes the 5%.\n
\n
\n
\n
\n ML Systems Engineering\n
\n
\n Build the infrastructure that makes the model run reliably\n at scale, within hardware constraints, in production.

\n Optimizes the 95%.\n
\n
\n
\n A model that achieves 99% accuracy in a Jupyter notebook is not a product.\nIt becomes a product only when it can run in real-time on real hardware,\nserve thousands of concurrent users, recover from failures, detect when it\ndegrades, and update without downtime. That is the engineering this course teaches.
" } } ], @@ -45,7 +45,7 @@ }, { "id": "bkHC", - "code_hash": "e7a18c8049ed6bfd00fc0fef89bdd96d", + "code_hash": "1bcdf120c15fbde8d591fa469285e078", "outputs": [ { "type": "data", @@ -58,12 +58,12 @@ }, { "id": "lEQa", - "code_hash": "3d14a02aa63fe0fd8cafcc26f1ef8172", + "code_hash": "5233f52c0b3cbb1ccbb35040a0f91a2b", "outputs": [ { "type": "data", "data": { - "text/plain": "" + "text/html": "
\n
\n
\n \u26a0\ufe0f Not quite\n
\n
\n **Not quite.** More training data would help if you were retraining \u2014 but the immediate problem is that you don't even *know* the model is degrading until someone complains. The systems problem is the absence of monitoring. Data collection is part of the solution, but detecting the problem comes first.\n
\n
\n
" } } ], @@ -71,12 +71,12 @@ }, { "id": "PKri", - "code_hash": "63bd617772be04f09aef7b9bd42c6461", + "code_hash": "cda5d32f0c0cb1ffa41f016278e72b82", "outputs": [ { "type": "data", "data": { - "text/plain": "" + "text/html": "

Why Constraints Drive Architecture

\nThe same model cannot simply be \"resized\" to run everywhere.\nThree physical laws carve the deployment landscape into distinct regimes\nthat no amount of software engineering can bridge:
\n
\n\n
\n
\u26a1
\n
\n The Speed of Light\n
\n
\n London to New York = 36 ms minimum round-trip, one-way.\n A self-driving car that needs a 10 ms decision loop\n cannot route to a remote datacenter.\n Physics sets this floor. No GPU upgrade helps.\n
\n
\n\n
\n
\ud83c\udf21\ufe0f
\n
\n Thermodynamics\n
\n
\n Heat accumulates faster than a small enclosure can dissipate it.\n A smartphone running a heavy model continuously\n throttles its processor after 90 seconds.\n No software fix prevents heat.\n
\n
\n\n
\n
\ud83d\udcbe
\n
\n Memory Physics\n
\n
\n Moving data through memory costs energy and takes time.\n A microcontroller with 256 KB of SRAM\n cannot page memory from disk.\n If the model doesn't fit, it doesn't run.\n
\n
\n\n
\n These three constraints \u2014 latency floors, power limits, and memory capacity \u2014\ndivide the world into four fundamentally different deployment environments.\nEngineers who treat deployment as an afterthought collide with these walls\nafter months of architectural work.\nThe insight of ML Systems engineering: choose your regime first,\nbecause the physics of that regime constrains every design decision that follows.
" } } ], @@ -84,7 +84,7 @@ }, { "id": "Xref", - "code_hash": "36b13dca6d552ab03eb36816b0cb2cad", + "code_hash": "b7e2d50be5fd599276333a173d5a52ac", "outputs": [ { "type": "data", @@ -97,7 +97,7 @@ }, { "id": "SFPL", - "code_hash": "6afaf345be9de9338a6d263ea95c39d2", + "code_hash": "c8eed0bb6c348abf5c520c0b82abbb5c", "outputs": [ { "type": "data", @@ -110,23 +110,120 @@ }, { "id": "BYtC", - "code_hash": "1bda80f2be4d3658e0baa43fbe7ae8c1", + "code_hash": "c5affd7fc479b743727501b988968c2b", + "outputs": [ + { + "type": "data", + "data": { + "text/plain": "" + } + } + ], + "console": [] + }, + { + "id": "RGSE", + "code_hash": "bc63f9af4a914e5e2ce16d1ae16548d6", + "outputs": [ + { + "type": "data", + "data": { + "text/plain": "" + } + } + ], + "console": [] + }, + { + "id": "Kclp", + "code_hash": "94be6867fba4661cf7ad924e093bc5ca", "outputs": [ { "type": "error", - "ename": "exception", - "evalue": "name 'view' is not defined", + "ename": "ancestor-stopped", + "evalue": "This cell wasn't run because an ancestor was stopped with `mo.stop`: ", "traceback": [] } ], - "console": [ + "console": [] + }, + { + "id": "emfo", + "code_hash": "0393b9cb5eaf6dbb04d44c962fd8fcc1", + "outputs": [ { - "type": "stream", - "name": "stderr", - "text": "
Traceback (most recent call last):\n  File "/var/folders/nv/p2yc903d60vbvprnhf4hhvhc0000gq/T/marimo_32436/__marimo__cell_BYtC_.py", line 1, in <module>\n    view\nNameError: name 'view' is not defined\n
\n
", - "mimetype": "application/vnd.marimo+traceback" + "type": "error", + "ename": "ancestor-stopped", + "evalue": "This cell wasn't run because an ancestor was stopped with `mo.stop`: ", + "traceback": [] } - ] + ], + "console": [] + }, + { + "id": "Hstk", + "code_hash": "2d43ec0662d1feb3264fe81613dbd973", + "outputs": [ + { + "type": "error", + "ename": "ancestor-stopped", + "evalue": "This cell wasn't run because an ancestor was stopped with `mo.stop`: ", + "traceback": [] + } + ], + "console": [] + }, + { + "id": "nWHF", + "code_hash": "288b8aeadc385e04249311d6fd71ca10", + "outputs": [ + { + "type": "error", + "ename": "ancestor-stopped", + "evalue": "This cell wasn't run because an ancestor was stopped with `mo.stop`: ", + "traceback": [] + } + ], + "console": [] + }, + { + "id": "iLit", + "code_hash": "1d3104cde642f65c3f442f3eacd64162", + "outputs": [ + { + "type": "error", + "ename": "ancestor-stopped", + "evalue": "This cell wasn't run because an ancestor was stopped with `mo.stop`: ", + "traceback": [] + } + ], + "console": [] + }, + { + "id": "ZHCJ", + "code_hash": "aea079cd3a2333ee1fad70908380fd0a", + "outputs": [ + { + "type": "error", + "ename": "ancestor-stopped", + "evalue": "This cell wasn't run because an ancestor was stopped with `mo.stop`: ", + "traceback": [] + } + ], + "console": [] + }, + { + "id": "ROlb", + "code_hash": "d2f0420508045e548e25db5b3fb08abd", + "outputs": [ + { + "type": "data", + "data": { + "text/html": "\n
\n
\ud83d\uddc2\ufe0f DESIGN LEDGER
\n
\n Context: \n EDGE\n
\n
\n Chapter: \n 0\n
\n
\n Status: \n Active \u2014 Chapter 0\n
\n
\n" + } + } + ], + "console": [] } ] } \ No newline at end of file diff --git a/labs/vol1/lab_00_introduction.py b/labs/vol1/lab_00_introduction.py index 9e5b4fa5b..e412793d0 100644 --- a/labs/vol1/lab_00_introduction.py +++ b/labs/vol1/lab_00_introduction.py @@ -3,206 +3,1154 @@ import marimo __generated_with = "0.19.6" app = marimo.App(width="full") +# ───────────────────────────────────────────────────────────────────────────── +# LAB 00: THE ARCHITECT'S PORTAL +# +# This is an ML Systems lab — not an ML lab. +# Students are not here to learn how models work. +# They are here to understand that where a model runs +# determines whether it can run at all. +# +# Four sections: +# 1. Concept blocks with structured checks (3 total) +# 2. Interface Orientation — cockpit anatomy, live levers, prediction lock, MathPeek +# No physics instruments (introduced in Lab 01+). +# No prediction locks in anger (students haven't read Chapter 1 yet). +# Progressive disclosure: each check gates the next concept. +# +# Concepts covered (all from pre-reading context, no chapter required): +# 1. The 95% Problem — ML systems ≠ ML models +# 2. Physical constraints partition deployment into distinct regimes +# 3. Constraints are immovable — the choice of regime is the architecture +# 4. UI scaffolding — every recurring component demonstrated before Lab 01 +# +# Design Ledger: initialized with deployment context at completion. +# ───────────────────────────────────────────────────────────────────────────── + @app.cell def _(): import marimo as mo import sys - import os from pathlib import Path - # Robust path finding: find the repo root relative to this file _root = Path(__file__).resolve().parents[2] if str(_root) not in sys.path: sys.path.insert(0, str(_root)) - from mlsysim.sim import Personas - from mlsysim.viz import dashboard - return Personas, dashboard, mo + from labs.core.state import DesignLedger + from labs.core.style import COLORS, LAB_CSS + ledger = DesignLedger() + return mo, ledger, COLORS, LAB_CSS + + +# ───────────────────────────────────────────────────────────────────────────── +# HEADER +# ───────────────────────────────────────────────────────────────────────────── @app.cell -def _(): - # ZONE 0: THE HOOK +def _(mo, LAB_CSS): + mo.vstack([ + LAB_CSS, + mo.md(""" +
+
+ Machine Learning Systems · Volume I · Lab 00 +
+

+ The Architect's Portal +

+

+ This course is not about machine learning. It is about the infrastructure + that makes machine learning possible — and the physical laws that govern it. +

+
+ + Orientation · 3 Concept Checks · Interface Tour + + + 20–25 min + + + No prior reading required + +
+
+ """), + ]) return +# ───────────────────────────────────────────────────────────────────────────── +# CONCEPT 1: THE 95% PROBLEM +# ───────────────────────────────────────────────────────────────────────────── + @app.cell def _(mo): - # ZONE 1: THE IDENTITY - persona_selector = mo.ui.dropdown( - options={ - "Cloud Titan": "cloud", - "Edge Guardian": "edge", - "Mobile Nomad": "mobile", - "Tiny Pioneer": "tiny" - }, - value="Cloud Titan", - label="🚀 STEP 1: SELECT YOUR CAREER TRACK" - ) - return (persona_selector,) + mo.vstack([ + mo.md("---"), + mo.md(""" + ## The 95% Problem + When Google published a study of their internal ML systems in 2015, they found + something that surprised the field. In a production ML system, the actual model — + the neural network, the training algorithm, the matrix math — accounts for roughly + **5% of the total codebase**. -@app.cell -def _(Personas, persona_selector): - # BRAIN: STORY & PHYSICS - _key = persona_selector.value - persona = Personas.get(_key) + The other **95%** is infrastructure: data pipelines, serving systems, monitoring, + hardware resource management, configuration, feature stores, deployment tooling. - _stories = { - "cloud": { - "title": "The Exaflop Factory", - "img": "🏭", - "narrative": "You are the infrastructure lead for a global LLM. You manage 100,000 GPUs. Your enemies are Electricity Bills and Grid Stability. A 1% increase in MFU saves enough energy to power a small city.", - "wall": "The Power Wall", - "expect": "Vol 2: Fleet Orchestration" - }, - "edge": { - "title": "The Zero-Collision Loop", - "img": "🏎️", - "narrative": "You are the safety engineer for an Autonomous Vehicle fleet. Your model must detect a pedestrian and brake within 10ms. If the network jitters, the car stops. Safety is your only constraint.", - "wall": "The Light Barrier", - "expect": "Vol 2: Deterministic Latency" - }, - "mobile": { - "title": "The Palm-Sized Assistant", - "img": "📱", - "narrative": "Your vision app runs on 100 million smartphones. If you use too much NPU power, the phone gets hot and throttles. You must fit 'Intelligence' into a literal thermal pocket.", - "wall": "The Thermal Wall", - "expect": "Vol 1: Model Compression" - }, - "tiny": { - "title": "The Smart Doorbell", - "img": "🔔", - "narrative": "You are hacking on a $2 microcontroller with 256KB of memory. It runs on a battery for a year. You are fighting for every single byte and microwatt.", - "wall": "The Memory Wall", - "expect": "Vol 1: TinyML Physics" - } - } - - story = _stories.get(_key) - _budgets = {"cloud": 95, "edge": 40, "mobile": 25, "tiny": 8} - budget = _budgets.get(_key) - return budget, persona, story - - -@app.cell -def _(mo, persona, story): - # ZONE 2: THE BRIEFING - story_card = mo.md( - f""" -
-
- {story['img']} -
-

{story['title']}

-

Current Persona: {persona.name}

+ This has a direct implication for how you should think about your role as an engineer: + """), + mo.Html(""" +
+
+
+ ML Engineering +
+
+ Build and improve the model. Choose the architecture. + Tune hyperparameters. Improve accuracy.

+ Optimizes the 5%.
-
-

{story['narrative']}

-
- ⚠️ Primary Barrier: {story['wall']} - 🎯 Track Focus: {story['expect']} +
+
+ ML Systems Engineering +
+
+ Build the infrastructure that makes the model run reliably + at scale, within hardware constraints, in production.

+ Optimizes the 95%.
- """ - ) - return (story_card,) + """), + mo.md(""" + A model that achieves 99% accuracy in a Jupyter notebook is **not a product**. + It becomes a product only when it can run in real-time on real hardware, + serve thousands of concurrent users, recover from failures, detect when it + degrades, and update without downtime. That is the engineering this course teaches. + """), + ]) + return +# ───────────────────────────────────────────────────────────────────────────── +# CHECK 1 +# ───────────────────────────────────────────────────────────────────────────── + @app.cell def _(mo): - complexity_slider = mo.ui.slider( - start=1, stop=100, step=1, value=80, - label="2. ADAPT THE ARCHITECTURE: Set Model Size (%)" + check1 = mo.ui.radio( + options={ + "A) The model architecture — choosing transformers over CNNs": "A", + "B) The training algorithm — selecting Adam vs SGD": "B", + "C) The serving infrastructure — how the model runs reliably in production": "C", + "D) The dataset size — gathering more labeled training examples": "D", + }, + label="""**Check your understanding.** A startup ships a model with 94% accuracy. +Six months later, accuracy has silently dropped to 81% in production — but no code +has changed. As an ML Systems engineer, which part of the system is your *primary* +domain for diagnosing and fixing this?""", ) - return (complexity_slider,) + return (check1,) @app.cell -def _(budget, complexity_slider, dashboard, persona, story): - # CALCULATE SUCCESS - is_safe = complexity_slider.value <= budget - - header = dashboard.command_header( - title=f"Command Center: {persona.name}", - subtitle=f"Mission: {story['title']} at scale {persona.scale_factor:,.0f} {persona.unit_of_scale}", - persona_name=persona.role, - scale=f"{persona.scale_factor:,.0f} {persona.unit_of_scale}", - constraints={"Physics Compliance": is_safe, "Deployment Ready": is_safe} +def _(mo, check1): + mo.stop( + check1.value is None, + mo.vstack([ + check1, + mo.callout( + mo.md("_Select an answer to continue._"), + kind="warn", + ), + ]) ) - return (is_safe,) - -@app.cell -def _( - complexity_slider, - dashboard, - is_safe, - mo, - persona, - persona_selector, - story_card, -): - # TAB 1: BRIEFING - briefing_content = mo.vstack([ - mo.md("### 🏁 Phase 1: Identity & Context"), - persona_selector, - story_card, - dashboard.pro_note( - "Architect Selection", - "Choose your path. Each role below represents a different scale of the AI Triad." - ) - ]) - - # TAB 2: SIMULATION - simulation_content = mo.vstack([ - mo.md("### 🎛️ Phase 2: The Balancing Act"), - dashboard.pro_note( - "The Crisis at Hand", - f"Your current model complexity is at {complexity_slider.value}%. " + - (f"The Physics Compliance badge is red! You are violating the laws of the {persona.primary_constraint}. Reduce complexity to recover." if not is_safe else "The system is balanced. You are ready for the final audit.") + _correct = check1.value == "C" + _feedback = { + "A": ( + "**Not quite.** The architecture hasn't changed — the model itself is unchanged. " + "The issue is that the *world* changed while the model stayed fixed. " + "Model architecture is an ML concern; detecting and responding to drift " + "is a *systems* concern — monitoring, pipelines, retraining triggers." ), - dashboard.layout_cockpit( - dashboard.lever_panel(mo.vstack([ - mo.md("Adjust the lever below to resize your model."), - complexity_slider - ])), - dashboard.telemetry_panel(mo.vstack([ - mo.md(f"#### instrument: {persona.name}"), - mo.md(f"Current Load: {complexity_slider.value}%"), - mo.md(f"Constraint: {persona.primary_constraint}"), - mo.md("---"), - mo.md(f"
{'✅ SYSTEM NOMINAL' if is_safe else '⚠️ PHYSICAL OVERLOAD'}
"), - ]), color="#3182ce" if is_safe else "#e53e3e"), - mo.md("") # Audit trail moved to Tab 3 - ) - ]) + "B": ( + "**Not quite.** The training algorithm only runs during training. " + "Once the model is deployed, SGD vs Adam no longer matters. " + "The degradation happened in production — that's the systems layer: " + "monitoring, data pipelines, serving infrastructure." + ), + "C": ( + "**Correct.** The model hasn't changed — but the world it's operating in has. " + "This is *silent degradation*, one of the defining challenges of ML systems. " + "Your job is not to debug code; it's to build monitoring that detects when " + "production data drifts away from training data, and pipelines that respond. " + "That's the 95%." + ), + "D": ( + "**Not quite.** More training data would help if you were retraining — " + "but the immediate problem is that you don't even *know* the model is degrading " + "until someone complains. The systems problem is the absence of monitoring. " + "Data collection is part of the solution, but detecting the problem comes first." + ), + } - # TAB 3: DEPLOYMENT - deployment_content = mo.vstack([ - mo.md("### 📝 Phase 3: Audit & Deployment"), - dashboard.audit_panel(mo.vstack([ - mo.md("Before we deploy to the fleet, you must provide the engineering rationale for your chosen complexity level."), - mo.ui.text_area(placeholder="Why is this setup optimal for your mission?", label="Justification"), - mo.center(mo.ui.button(label="🚀 Deploy to Fleet", kind="success", disabled=not is_safe)) - ])) - ]) + _color = "#f0fdf4" if _correct else "#fef2f2" + _border = "#16a34a" if _correct else "#ef4444" + _icon = "✅" if _correct else "⚠️" - # ASSEMBLE TABS - tabs = mo.ui.tabs({ - "1. Briefing": briefing_content, - "2. Simulation": simulation_content, - "3. Deployment": deployment_content - }) + mo.vstack([ + check1, + mo.Html(f""" +
+
+ {_icon} {"Correct" if _correct else "Not quite"} +
+
+ {_feedback[check1.value]} +
+
+ """), + ]) + return + + +# ───────────────────────────────────────────────────────────────────────────── +# CONCEPT 2: PHYSICAL CONSTRAINTS PARTITION DEPLOYMENT +# ───────────────────────────────────────────────────────────────────────────── + +@app.cell +def _(mo, check1): + mo.stop(check1.value is None) + + mo.vstack([ + mo.md("---"), + mo.md(""" + ## Why Constraints Drive Architecture + + The same model cannot simply be "resized" to run everywhere. + Three physical laws carve the deployment landscape into distinct regimes + that no amount of software engineering can bridge: + """), + mo.Html(""" +
+ +
+
+
+ The Speed of Light +
+
+ London to New York = 36 ms minimum round-trip, one-way. + A self-driving car that needs a 10 ms decision loop + cannot route to a remote datacenter. + Physics sets this floor. No GPU upgrade helps. +
+
+ +
+
🌡️
+
+ Thermodynamics +
+
+ Heat accumulates faster than a small enclosure can dissipate it. + A smartphone running a heavy model continuously + throttles its processor after 90 seconds. + No software fix prevents heat. +
+
+ +
+
💾
+
+ Memory Physics +
+
+ Moving data through memory costs energy and takes time. + A microcontroller with 256 KB of SRAM + cannot page memory from disk. + If the model doesn't fit, it doesn't run. +
+
+ +
+ """), + mo.md(""" + These three constraints — latency floors, power limits, and memory capacity — + divide the world into four fundamentally different deployment environments. + Engineers who treat deployment as an afterthought collide with these walls + after months of architectural work. + + **The insight of ML Systems engineering:** choose your regime *first*, + because the physics of that regime constrains every design decision that follows. + """), + ]) + return + + +# ───────────────────────────────────────────────────────────────────────────── +# CHECK 2 (multi-select — more than one correct answer) +# ───────────────────────────────────────────────────────────────────────────── + +@app.cell +def _(mo, check1): + mo.stop(check1.value is None) + + check2 = mo.ui.multiselect( + options={ + "Use a smaller model with fewer parameters": "model_size", + "Apply INT8 quantization to reduce precision": "quantization", + "Move the datacenter server physically closer": "move_server", + "Use a faster GPU with higher TFLOPS": "faster_gpu", + "Deploy the model directly on the vehicle": "edge_deploy", + }, + label="""**Check your understanding.** An autonomous vehicle perception system +is routed to a cloud datacenter 2,000 km away. Round-trip latency is 40 ms. +The safety requirement is a 10 ms end-to-end decision loop. + +Select **all approaches** that could actually solve the latency problem:""", + ) + return (check2,) + + +@app.cell +def _(mo, check1, check2): + mo.stop(check1.value is None or len(check2.value) == 0) + + _correct_set = {"move_server", "edge_deploy"} + _selected = set(check2.value) + _exactly_right = _selected == _correct_set + _has_wrong = bool(_selected - _correct_set) + _missing_right = bool(_correct_set - _selected) + + _option_labels = { + "model_size": "Use a smaller model", + "quantization": "Apply INT8 quantization", + "move_server": "Move the server physically closer", + "faster_gpu": "Use a faster GPU", + "edge_deploy": "Deploy on the vehicle", + } + + _rows = "" + for _key, _label in _option_labels.items(): + _is_selected = _key in _selected + _is_correct = _key in _correct_set + if _is_selected and _is_correct: + _icon, _bg, _col = "✅", "#f0fdf4", "#15803d" + elif _is_selected and not _is_correct: + _icon, _bg, _col = "❌", "#fef2f2", "#dc2626" + elif not _is_selected and _is_correct: + _icon, _bg, _col = "◉", "#fffbeb", "#d97706" + else: + _icon, _bg, _col = "○", "#f8fafc", "#94a3b8" + _rows += f""" +
+ {_icon} + + {_label} + +
""" + + _explanation = """ +
+ The physics: The 40 ms latency comes from the speed of light + across 2,000 km of fiber — approximately 200,000 km/s. + No software change, no GPU upgrade, no model compression + removes this physical floor.

+ Smaller models and faster GPUs reduce + compute time, but the round-trip latency is dominated by + propagation delay — they don't help.

+ Moving the server physically closer or + deploying directly on the vehicle are the only solutions + because they reduce the distance the signal must travel. + This is why Edge ML exists as a deployment paradigm — not as a preference, + but as a physical necessity. +
+ """ + + _title = "✅ Exactly right." if _exactly_right else ( + "Partially right — review the highlighted options." if not _has_wrong else + "Not quite — some selections add compute speed, not reduce propagation delay." + ) + _border = "#16a34a" if _exactly_right else ("#f59e0b" if not _has_wrong else "#ef4444") + _bg_outer = "#f0fdf4" if _exactly_right else ("#fffbeb" if not _has_wrong else "#fef2f2") + + mo.vstack([ + check2, + mo.Html(f""" +
+
{_title}
+ {_rows} + {_explanation} +
+ """), + ]) + return + + +# ───────────────────────────────────────────────────────────────────────────── +# CONCEPT 3: THE DEPLOYMENT REGIMES +# ───────────────────────────────────────────────────────────────────────────── + +@app.cell +def _(mo, check1, check2): + mo.stop(check1.value is None or len(check2.value) == 0) + + mo.vstack([ + mo.md("---"), + mo.md(""" + ## The Four Physical Regimes + + The physical constraints above don't create a continuum — they create + **four distinct operating envelopes**, each demanding different infrastructure, + different optimization strategies, and different definitions of "correct." + """), + mo.Html(""" +
+ +
+
+ ☁️ +
+
Cloud ML
+
+ Binding constraint: Memory Bandwidth Wall +
+
+
+
+ Virtually unlimited compute and storage. The binding constraint + is not processing power — it is how fast data can move from + memory to compute cores. Most large models are memory-bandwidth-bound, + not compute-bound. +
+
+ Latency: 100–500 ms · Power: kilowatts · Memory: terabytes +
+
+ +
+
+ 🤖 +
+
Edge ML
+
+ Binding constraint: Latency Determinism Wall +
+
+
+
+ Computation happens near the data source — factory floors, + vehicles, hospitals. The binding constraint is not average latency + but tail latency: a single spike in a safety-critical system + is a failure, not a statistic. +
+
+ Latency: 10–100 ms · Power: watts–tens of watts · Memory: gigabytes +
+
+ +
+
+ 📱 +
+
Mobile ML
+
+ Binding constraint: Thermal Power Wall +
+
+
+
+ Intelligence runs directly on consumer devices. Compute capability + is substantial, but sustained operation is limited by heat + accumulation in a sealed, handheld enclosure. After thermal + throttling, performance drops by 30–70%. +
+
+ Latency: 5–50 ms · Power: 3–5 W sustained · Memory: 4–16 GB +
+
+ +
+
+ 👂 +
+
TinyML
+
+ Binding constraint: SRAM Capacity Wall +
+
+
+
+ Always-on intelligence in microcontrollers running on + coin-cell batteries. There is no operating system, no virtual + memory, no paging. If the model does not fit in 256 KB of SRAM, + it does not run. Every byte is a resource allocation decision. +
+
+ Latency: 1–10 ms · Power: microwatts–milliwatts · Memory: kilobytes +
+
+ +
+ """), + mo.callout( + mo.md( + "**Nine orders of magnitude** separate the largest cloud deployment " + "(megawatts, terabytes) from the smallest TinyML device (microwatts, kilobytes). " + "The engineering principles that govern one end of this spectrum " + "do not transfer to the other. This is why ML Systems is a discipline, " + "not a configuration setting." + ), + kind="info", + ), + ]) + return + + +# ───────────────────────────────────────────────────────────────────────────── +# CHECK 3 (ranking via radio — forces active reasoning about trade-offs) +# ───────────────────────────────────────────────────────────────────────────── + +@app.cell +def _(mo, check1, check2): + mo.stop(check1.value is None or len(check2.value) == 0) + + check3 = mo.ui.radio( + options={ + "A) Cloud ML — access to the most compute": "A", + "B) Edge ML — low latency and local processing": "B", + "C) Mobile ML — runs on the patient's own device": "C", + "D) TinyML — lowest power, can run for months on a battery": "D", + }, + label="""**Check your understanding.** A hospital wants to deploy an AI system +that detects sepsis from ICU sensor readings. Requirements: results within 2 ms of +each sensor reading, no patient data can leave the hospital network, and the sensor +node must run for 6 months on a small battery without replacement. + +Which deployment paradigm is the *only* one that satisfies all three requirements simultaneously?""", + ) + return (check3,) + + +@app.cell +def _(mo, check1, check2, check3): + mo.stop(check1.value is None or len(check2.value) == 0 or check3.value is None) + + _correct = check3.value == "D" + _feedback = { + "A": ( + "**Not quite.** Cloud ML violates two of the three requirements. " + "Round-trip latency to a cloud server is 10–500 ms — far above the 2 ms requirement. " + "And patient data would leave the hospital network, violating the privacy constraint. " + "Cloud gives you power, but power is not the binding constraint here." + ), + "B": ( + "**Closer, but not sufficient.** Edge ML achieves low latency and local processing, " + "satisfying the first two requirements. But an edge server draws tens of watts " + "continuously — it cannot run for 6 months on a small battery. " + "The power constraint eliminates it. Edge is right for latency; wrong for energy." + ), + "C": ( + "**Not quite.** Mobile ML runs locally (satisfying privacy) and can meet the " + "latency target, but sustained operation at smartphone-level power draws " + "3–5 W. A small sensor battery would last hours, not months. " + "The energy envelope makes mobile ML infeasible for always-on sensing." + ), + "D": ( + "**Correct.** TinyML is the only paradigm that satisfies all three simultaneously. " + "Inference happens directly on the sensor node — no network latency, no data " + "leaving the hospital. Microcontrollers running at microwatts can sustain " + "always-on sensing for months on a coin-cell battery. " + "The model must fit in kilobytes — that is the engineering challenge this regime imposes. " + "Notice: this was not a software preference. It was a constraint analysis." + ), + } + + _color = "#f0fdf4" if _correct else "#fef2f2" + _border = "#16a34a" if _correct else "#ef4444" + _icon = "✅" if _correct else "⚠️" + + mo.vstack([ + check3, + mo.Html(f""" +
+
+ {_icon} {"Correct" if _correct else "Not quite"} +
+
+ {_feedback[check3.value]} +
+
+ """), + ]) + return + + +# ───────────────────────────────────────────────────────────────────────────── +# SECTION 4: LAB INTERFACE ORIENTATION +# Shows students the exact UI components they will encounter from Lab 01 onward. +# Zero new physics content — pure scaffolding of look-and-feel. +# ───────────────────────────────────────────────────────────────────────────── + +@app.cell +def _(mo, check1, check2, check3): + mo.stop(check1.value is None or len(check2.value) == 0 or check3.value is None) + + mo.vstack([ + mo.md("---"), + mo.md(""" + ## How Every Lab in This Curriculum Is Structured + + Starting from Lab 01, every lab follows the same **four-zone cockpit layout**. + This is not aesthetic — it is a deliberate information architecture that + separates *what you control* from *what the system tells you*. + + Before you begin Lab 01, spend two minutes with the interactive tour below. + You will recognize every element the moment you see it. + """), + ]) return @app.cell -def _(view): - view +def _(mo, check1, check2, check3, COLORS): + mo.stop(check1.value is None or len(check2.value) == 0 or check3.value is None) + + # ── ZONE ANATOMY DIAGRAM ───────────────────────────────────────── + _zone_html = """ +
+ +
+
+ Zone 1 · Command Header +
+
+ Lab number, scenario title, your active persona, and live + constraint badges (Latency, Power, Memory). + Badges turn red the moment you violate a constraint — + the system doesn't wait to tell you. Always visible. +
+
+ +
+
+ Zone 2 · Engineering Levers +
+
+ Sliders, dropdowns, and toggles that modify your design — + hardware target, batch size, precision, model variant. + Every change recalculates everything instantly. + No "Submit" button. The lab reacts in real-time. +
+
+ +
+
+ Zone 3 · Live Telemetry +
+
+ Metric cards, Roofline chart (from Lab 11), Latency Waterfall + (from Lab 02). All charts update as you move sliders. + Your job is to read these instruments and trace cause to effect. +
+
+ +
+
+ Zone 4 · Audit Trail +
+
+ Consequence log, explanatory text, and a free-form rationale box. + Explain your design decision in writing before + submitting. The act of articulating trade-offs is the learning — + not the number the simulator returns. +
+
+ +
+ """ + + # ── LIVE COMPONENT TOUR via mo.ui.tabs ──────────────────────────── + _tab_overview = mo.vstack([ + mo.md(""" + **`mo.ui.tabs`** — labs with multiple acts use tab navigation. + Each tab is a self-contained section. You are looking at a live example right now. + + In later labs, tabs structure the workflow: + ``` + Act I: Baseline → establish the initial state + Act II: Intervention → apply an optimization + ``` + The tab structure ensures you *commit* to a baseline before modifying it. + This is not UX convenience — it enforces the scientific method: measure before you change. + """), + mo.callout( + mo.md("Switch between tabs above to navigate. Your work in each tab is preserved independently."), + kind="info" + ), + ]) + + _tab_levers = mo.vstack([ + mo.md("**Zone 2 levers** update the system state reactively. Here is a live example:"), + mo.hstack([ + mo.vstack([ + mo.md("**Hardware target**"), + mo.ui.dropdown( + options=["H100 (Cloud)", "Jetson Orin NX (Edge)", "Smartphone NPU (Mobile)", "Cortex-M7 (TinyML)"], + value="H100 (Cloud)", + label="Select hardware:" + ), + mo.md("**Batch size**"), + mo.ui.slider(start=1, stop=128, step=1, value=32, label="Batch size:"), + ], gap=1), + mo.Html(f""" +
+
Live Telemetry Preview
+
+ Latency: 12.4 ms
+ Throughput: 2,580 tok/s
+ Memory: 34.2 GB
+ MFU: 47% +
+
+ In real labs these numbers
update as you move sliders. +
+
+ """), + ], gap=2, justify="start"), + mo.callout( + mo.md("**Key insight:** Every lever connects to every metric. Changing batch size affects memory, which affects throughput, which affects cost. The cockpit shows all effects simultaneously."), + kind="warn", + ), + ]) + + _tab_prediction = mo.vstack([ + mo.md(""" + **The Prediction Lock** — the most important component in the curriculum. + + Before every Act in Labs 01–14, you will see a **Prediction Lock** like the one below. + You must commit to a prediction *before* you can run the simulation. + """), + mo.Html(""" +
+
+ 🔒 Prediction Lock — Act I +
+
+ Scenario: You double the batch size from 32 to 64 on an H100. + The model is memory-bandwidth-bound.

+ Predict: Will throughput (tokens/second) increase, + decrease, or stay approximately the same? +
+
+
+ A) Increase proportionally (~2×) +
+
+ B) Increase sub-linearly +
+
+ C) Stay the same +
+
+
+ ↑ In a real lab, selecting an answer here unlocks the simulation instruments below. +
+
+ """), + mo.md(""" + **Why this matters:** Research on deliberate practice shows that making an + explicit prediction before observing a result dramatically increases retention. + If your prediction is wrong, you experience *productive failure* — the gap + between expectation and observation drives deeper encoding than passive reading. + + The prediction lock is not a gatekeeping mechanism. It is a learning amplifier. + """), + ]) + + _tab_mathpeek = mo.vstack([ + mo.md(""" + **`MathPeek` accordion** — the invariant behind every instrument. + + Every chart and metric in the telemetry panel connects to a physical equation. + The MathPeek accordion surfaces that equation on demand — you are never just + moving sliders, you are probing the underlying physics. + """), + mo.accordion({ + "📐 View the Invariant — Iron Law of ML Systems (Preview)": mo.md(""" + **Formula:** `T = D/BW + O/R + L` + + **Components:** + - **T** — Total end-to-end latency (seconds) + - **D** — Data size (bytes moved across memory hierarchy) + - **BW** — Memory bandwidth (bytes/second) + - **O** — FLOPs required (floating-point operations) + - **R** — Compute rate (FLOPs/second, hardware peak × MFU) + - **L** — Fixed overhead latency (dispatch tax, network RTT) + + _This equation is the central object of the entire curriculum. + You will encounter it in every lab. Open this accordion whenever + you need to re-anchor a number to first principles._ + """), + }), + mo.callout( + mo.md("**Lab 01** introduces the Iron Law formally. For now, recognize the accordion — it lives in every lab."), + kind="info", + ), + ]) + + _tour_tabs = mo.ui.tabs({ + "🏗️ Cockpit Anatomy": _tab_overview, + "🎛️ Live Levers": _tab_levers, + "🔒 Prediction Lock": _tab_prediction, + "📐 MathPeek": _tab_mathpeek, + }) + + mo.vstack([ + _tour_tabs, + mo.Html(""" +
+
+
+ Interface orientation complete. + You now recognize the four-zone cockpit, the live lever pattern, the + prediction lock, and the MathPeek accordion. These are the only UI + primitives used across all 14 labs — nothing new will be introduced + without explanation. +
+
+ """), + ]) + return + + +# ───────────────────────────────────────────────────────────────────────────── +# DEPLOYMENT CONTEXT SELECTION +# ───────────────────────────────────────────────────────────────────────────── + +@app.cell +def _(mo, check1, check2, check3): + mo.stop( + check1.value is None or len(check2.value) == 0 or check3.value is None, + mo.md("_Complete all three checks above to unlock your deployment context selection._") + ) + + mo.vstack([ + mo.md("---"), + mo.md(""" + ## Choose Your Physical Regime + + You have now seen why deployment context is a first-order engineering decision, + not an afterthought. For the next 15 labs, you will carry one deployment context + as your primary lens — the physical regime whose constraints will test every + optimization technique you learn. + + **This is not a career choice.** It is a choice of which physical law will + be your primary adversary. You will understand all four regimes — + but you will develop deep intuition for one. + """), + ]) + return + + +@app.cell +def _(mo, check1, check2, check3): + mo.stop(check1.value is None or len(check2.value) == 0 or check3.value is None) + + context_selector = mo.ui.radio( + options={ + "☁️ Cloud ML — your constraint is the Memory Bandwidth Wall": "cloud", + "🤖 Edge ML — your constraint is the Latency Determinism Wall": "edge", + "📱 Mobile ML — your constraint is the Thermal Power Wall": "mobile", + "👂 TinyML — your constraint is the SRAM Capacity Wall": "tiny", + }, + label="Select the deployment regime you will focus on throughout this curriculum:", + ) + return (context_selector,) + + +# ───────────────────────────────────────────────────────────────────────────── +# CONTEXT REVEAL + STAKEHOLDER MESSAGE + LEDGER INIT +# ───────────────────────────────────────────────────────────────────────────── + +@app.cell +def _(mo, check1, check2, check3, context_selector, ledger, COLORS): + mo.stop( + check1.value is None + or len(check2.value) == 0 + or check3.value is None + or context_selector.value is None, + mo.vstack([ + context_selector, + mo.md("_Select your deployment context above._"), + ]) + ) + + _key = context_selector.value + _contexts = { + "cloud": { + "color": COLORS["BlueLine"], + "bg": COLORS["BlueL"], + "label": "Cloud ML", + "nemesis": "Memory Bandwidth Wall", + "role": "LLM Infrastructure Lead", + "north_star":"Maximize sustained serving throughput for a 70B-parameter model on a multi-GPU cluster.", + "persona": "Your CTO", + "quote": ( + "We're burning $40,000 a day on GPU rentals. " + "If hardware utilization doesn't hit 50% by next quarter, " + "we run out of runway. The model is fine. The infrastructure is not. Fix it." + ), + "arc": [ + ("Labs 01–04", "Foundations", + "Learn the D·A·M taxonomy, the Iron Law, and why the Memory Wall is your primary constraint"), + ("Labs 05–08", "Build", + "Trace memory allocation through a transformer forward pass; profile your serving stack"), + ("Labs 09–11", "Optimize", + "Apply quantization, understand hardware utilization, and cross the efficiency threshold"), + ("Labs 12–14", "Deploy", + "Benchmark, monitor, and operate a production serving system at scale"), + ], + }, + "edge": { + "color": COLORS["RedLine"], + "bg": COLORS["RedL"], + "label": "Edge ML", + "nemesis": "Latency Determinism Wall", + "role": "Autonomous Systems Lead", + "north_star":"Maintain a deterministic 10 ms perception-to-decision loop on a Jetson Orin NX.", + "persona": "Your Safety Director", + "quote": ( + "A 5 ms latency spike added 15 cm of stopping distance at 60 mph. " + "That is a regulatory failure. I do not care about your average latency. " + "One tail event is one too many. Zero tolerance." + ), + "arc": [ + ("Labs 01–04", "Foundations", + "Understand latency decomposition, the Iron Law, and why P99 is the only metric that matters"), + ("Labs 05–08", "Build", + "Implement a priority scheduler; measure the tail-latency distribution of your inference stack"), + ("Labs 09–11", "Optimize", + "Apply structured pruning to reduce worst-case latency below the safety threshold"), + ("Labs 12–14", "Deploy", + "Validate deterministic SLAs on physical edge hardware under adversarial load"), + ], + }, + "mobile": { + "color": COLORS["OrangeLine"], + "bg": COLORS["OrangeL"], + "label": "Mobile ML", + "nemesis": "Thermal Power Wall", + "role": "Smartphone App Architect", + "north_star":"Run 60 FPS real-time on-device inference within a 2 W sustained thermal envelope.", + "persona": "Your UX Director", + "quote": ( + "Users are returning the device because it heats up after two minutes of AR. " + "You have 2 Watts of sustained thermal headroom. Not 2.1. Two. " + "Every watt you save is a feature." + ), + "arc": [ + ("Labs 01–04", "Foundations", + "Map the D·A·M trade-off for mobile NPUs; quantify the thermal budget"), + ("Labs 05–08", "Build", + "Implement MobileNetV2 with depthwise separable convolutions in TinyTorch"), + ("Labs 09–11", "Optimize", + "Apply INT8 quantization and operator fusion to stay within the thermal envelope"), + ("Labs 12–14", "Deploy", + "Benchmark sustained throughput on a power-constrained device under realistic workloads"), + ], + }, + "tiny": { + "color": COLORS["GreenLine"], + "bg": COLORS["GreenL"], + "label": "TinyML", + "nemesis": "SRAM Capacity Wall", + "role": "TinyML / Embedded Systems Lead", + "north_star":"Fit real-time keyword spotting in under 256 KB SRAM, running under 1 mW.", + "persona": "Your Hardware Lead", + "quote": ( + "We have 256 KB of on-chip SRAM. Every weight byte you keep " + "is audio buffer you lose. There is no paging. There is no swap. " + "If it does not fit, it does not run." + ), + "arc": [ + ("Labs 01–04", "Foundations", + "Count every byte in a DS-CNN keyword spotting model; understand SRAM allocation"), + ("Labs 05–08", "Build", + "Implement depthwise separable convolutions in TinyTorch; profile memory layout"), + ("Labs 09–11", "Optimize", + "Achieve 4× compression via magnitude pruning and INT8 quantization"), + ("Labs 12–14", "Deploy", + "Fit the full inference pipeline in 256 KB and validate on a physical MCU"), + ], + }, + } + + _t = _contexts[_key] + + # Persist to Design Ledger + ledger.save(chapter=0, design={ + "deployment_context": _key, + "check1_answer": check1.value, + "check1_correct": check1.value == "C", + "check2_selections": list(check2.value), + "check3_answer": check3.value, + "check3_correct": check3.value == "D", + }) + + _arc_rows = "".join([ + f""" + {phase} + {label} + {desc} + """ + for phase, label, desc in _t["arc"] + ]) + + mo.vstack([ + context_selector, + mo.md("---"), + + # Stakeholder message + mo.Html(f""" +
+
+ Incoming Message · {_t['persona']} +
+
+ "{_t['quote']}" +
+
+ """), + + # Mission card + mo.Html(f""" +
+
+ 🎖️ Deployment Context Confirmed +
+
+ {_t['label']} · {_t['role']} +
+
+ North Star: {_t['north_star']} +
+
+ Arch Nemesis: + {_t['nemesis']} +
+
+ Your 14-Lab Arc +
+ + {_arc_rows} +
+
+ """), + + mo.callout( + mo.md( + f"**Design Ledger initialized** — context: `{_key}`. " + "Your deployment regime pre-loads hardware defaults and scenario constraints " + "in every lab from Lab 01 onward. Proceed to **Lab 01: ML Introduction**." + ), + kind="success", + ), + ]) + return + + +# ───────────────────────────────────────────────────────────────────────────── +# DESIGN LEDGER HUD (always visible) +# ───────────────────────────────────────────────────────────────────────────── + +@app.cell +def _(mo, ledger, COLORS): + _track = ledger.get_track() or "NONE" + _color_map = { + "cloud": COLORS["BlueLine"], + "edge": COLORS["RedLine"], + "mobile": COLORS["OrangeLine"], + "tiny": COLORS["GreenLine"], + "NONE": "#475569", + } + _hud_color = _color_map.get(_track, "#475569") + _hud_status = "Uninitialized" if _track == "NONE" else "Active — Chapter 0" + + mo.Html(f""" +
+
🗂️ DESIGN LEDGER
+
+ Context: + {_track.upper()} +
+
+ Chapter: + 0 +
+
+ Status: + {_hud_status} +
+
+ """) return