Files
cs249r_book/periodic-table/table.yml
Vijay Janapa Reddi c2689608b8 fix(staffml): correct simulator memory/FLOP formulas + dedup periodic table
simulator/page.tsx:
  - Activation memory: replace 2*L*H^2*B*S formula with selective-recompute
    10*L*B*S*H formula (more accurate for modern training stacks).
  - FLOPs per iter: remove the spurious 3x multiplier on fwd+bwd. The
    flops_per_token figures in MODEL_CONFIGS already account for fwd+bwd.

issue-url.ts:
  - buildContributeUrl() now accepts an optional customBody argument so
    contribute page can pass an exported markdown body.

contribute/page.tsx:
  - Pass exportAsGitHubBody() to buildContributeUrl() so the GitHub issue
    pre-fills with the contributor's drafted question.

periodicTable.ts + periodic-table/table.yml:
  - Remove duplicate 'Knowledge Distillation' and 'Systolic Array (TPU
    Core)' entries under Efficiency & Optimization section.

__tests__/simulator-logic.test.ts: new tests asserting the corrected
memory + compute formulas against canonical model+hardware combos.
2026-04-22 17:10:03 -04:00

1114 lines
32 KiB
YAML

# Periodic Table of ML Systems — single source of truth.
# Generated from periodic-table/index.html by scripts/migrate-html-to-yaml.mjs
# Edit this file directly. Run `make all` (in periodic-table/) to regenerate
# index.html and the StaffML React data file from this YAML.
version: "0.2"
title: "The Periodic Table of Machine Learning Systems"
subtitle: "Two fundamental axes — abstraction layer and information-processing role — organize ML concepts the way electron shells and valence organize chemistry."
# ─── Blocks (information-processing roles, columns) ───────────
blocks:
- key: R
name: "Represent"
sub: "What holds information"
color: "#42a5f5"
cols: [1, 2, 3, 4]
- key: C
name: "Compute"
sub: "What transforms"
color: "#ef6c00"
cols: [5, 6, 7, 8, 9]
- key: X
name: "Communicate"
sub: "What moves"
color: "#26a69a"
cols: [10, 11, 12]
- key: K
name: "Control"
sub: "What decides"
color: "#fdd835"
cols: [13, 14, 15, 16]
- key: M
name: "Measure"
sub: "What observes"
color: "#78909c"
cols: [17, 18]
# ─── Rows (abstraction layers) ────────────────────────────────
rows:
- index: 1
key: "data"
name: "Data"
- index: 2
key: "math"
name: "Math"
- index: 3
key: "algorithms"
name: "Algorithms"
- index: 4
key: "architecture"
name: "Architecture"
- index: 5
key: "optimization"
name: "Optimization"
- index: 6
key: "runtime"
name: "Runtime"
- index: 7
key: "hardware"
name: "Hardware"
- index: 8
key: "production"
name: "Production"
# ─── Elements (90 total) ─────────────────────────────
elements:
- id: 1
sym: "Tn"
name: "Tensor"
block: R
row: 2
col: 1
year: null
desc: "The fundamental mathematical structure holding information (scalars, vectors, matrices)."
bonds: ["Op", "Cr", "Ob"]
why: "Row 1 (Math): most primitive object. Represent: it IS information."
- id: 2
sym: "Pr"
name: "Probability"
block: R
row: 2
col: 2
year: "1654"
desc: "The mathematical primitive for representing uncertainty — distributions, densities."
bonds: ["Tn", "Dv", "Ob"]
why: "Row 1 (Math): uncertain state. Represent: encodes beliefs."
- id: 3
sym: "Op"
name: "Operator"
block: C
row: 2
col: 5
year: null
desc: "The mathematical action of mapping one space to another (linear or non-linear transforms)."
bonds: ["Tn"]
why: "Row 1 (Math): pure transformation. Compute: transforms spaces."
- id: 4
sym: "Cr"
name: "Chain Rule"
block: X
row: 2
col: 10
year: "1676"
desc: "The fundamental mathematical mechanism that allows composed derivatives to be computed."
bonds: ["Op"]
why: "Row 1 (Math): derivative composition. Communicate: basis for error flow."
- id: 5
sym: "Ob"
name: "Objective"
block: K
row: 2
col: 13
year: null
desc: "The mathematical formulation of the goal (Argmin/Argmax)."
bonds: ["Cr", "Dv"]
why: "Row 1 (Math): the goal state. Control: defines \"better\" or \"worse\"."
- id: 6
sym: "Cs"
name: "Constraint"
block: K
row: 2
col: 14
year: null
desc: "The mathematical primitive for defining bounds and restrictions on variables."
bonds: ["Ob"]
why: "Row 1 (Math): solution space restriction. Control: hard boundaries."
- id: 7
sym: "Dv"
name: "Divergence"
block: M
row: 2
col: 17
year: null
desc: "The mathematical quantification of distance between distributions or tensors (e.g., KL, L2)."
bonds: ["Tn", "Pr"]
why: "Row 1 (Math): information measure. Measure: quantifies difference."
- id: 8
sym: "Pm"
name: "Parameter"
block: R
row: 3
col: 1
year: null
desc: "The irreducible learned memory or state of an algorithm (weights, biases)."
bonds: ["Dd", "Cv", "Gd"]
why: "Row 2 (Algorithm): learned state. Represent: instantiation of math state."
- id: 9
sym: "Eb"
name: "Embedding"
block: R
row: 3
col: 2
year: null
desc: "The fundamental algorithmic act of mapping a discrete symbol into continuous space."
bonds: ["Tn", "Dd"]
why: "Row 2 (Algorithm): discrete-to-continuous mapping."
- id: 10
sym: "Sp"
name: "Sample"
block: R
row: 3
col: 3
year: null
desc: "The irreducible unit of empirical data distribution (a single data point)."
bonds: ["Eb", "Lf"]
why: "Row 2 (Algorithm): data representation. Represent: the input unit."
- id: 11
sym: "Dd"
name: "Dense Dot"
block: C
row: 3
col: 5
year: null
desc: "The irreducible algorithm for fully connected, all-to-all information transformation."
bonds: ["Pm"]
why: "Row 2 (Algorithm): all-to-all transform. Compute."
- id: 12
sym: "Cv"
name: "Convolution"
block: C
row: 3
col: 6
year: null
desc: "The irreducible algorithm for local, weight-shared spatial information transformation."
bonds: ["Pm"]
why: "Row 2 (Algorithm): local transform. Compute."
- id: 13
sym: "Po"
name: "Pooling"
block: C
row: 3
col: 7
year: null
desc: "The algorithmic primitive for spatial or temporal reduction (Max, Average)."
bonds: ["Cv", "Dd"]
why: "Row 2 (Algorithm): primitive operation. Compute."
- id: 14
sym: "Sm"
name: "Sampling"
block: C
row: 3
col: 8
year: null
desc: "The primitive for stochastic selection from a probability distribution."
bonds: ["Pr"]
why: "Row 2 (Algorithm): primitive operation. Compute."
- id: 15
sym: "Ad"
name: "Autodiff"
block: X
row: 3
col: 10
year: "1970"
desc: "The algorithmic primitive that mechanically computes exact derivatives through arbitrary control flow."
bonds: ["Cr", "Pm"]
why: "Row 2 (Algorithm): error routing. Communicate."
- id: 16
sym: "Tk"
name: "Tokenization"
block: X
row: 3
col: 11
year: null
desc: "Segmenting raw input into discrete processing units."
bonds: ["Eb"]
why: "Row 2 (Algorithm): input segmentation. Communicate."
- id: 17
sym: "Gd"
name: "Grad Descent"
block: K
row: 3
col: 13
year: "1847"
desc: "The core control loop: takes communicated gradients and updates Parameters."
bonds: ["Ad", "Pm", "Lf"]
why: "Row 2 (Algorithm): update mechanism. Control."
- id: 18
sym: "Rw"
name: "Reward"
block: K
row: 3
col: 14
year: null
desc: "A scalar control signal evaluating the quality of an action (RL)."
bonds: ["Sp", "Gd"]
why: "Row 2 (Algorithm): evaluative signal. Control."
- id: 19
sym: "Iz"
name: "Initialization"
block: K
row: 3
col: 15
year: null
desc: "The algorithmic control for setting the starting state of parameters."
bonds: ["Pm", "Pr"]
why: "Row 2 (Algorithm): starting state control. Control."
- id: 20
sym: "Lf"
name: "Loss Function"
block: M
row: 3
col: 17
year: null
desc: "The specific algorithmic computation of the mathematical distance (e.g., Cross-Entropy)."
bonds: ["Dv", "Gd"]
why: "Row 2 (Algorithm): algorithmic measure. Measure."
- id: 21
sym: "Tp"
name: "Topology"
block: R
row: 4
col: 1
year: null
desc: "The fundamental structural assumption placed on data (Sequence, Grid, Graph)."
bonds: ["At", "Gt", "Cv"]
why: "Row 3 (Architecture): data structure. Represent."
- id: 22
sym: "Hs"
name: "Hidden State"
block: R
row: 4
col: 2
year: null
desc: "The architectural primitive for persistent intermediate representation."
bonds: ["Fb", "At", "Gt"]
why: "Row 3 (Architecture): structural memory. Represent."
- id: 23
sym: "At"
name: "Attention"
block: C
row: 4
col: 5
year: null
desc: "Letting data dynamically decide which other data it interacts with."
bonds: ["Mk"]
why: "Row 3 (Architecture): dynamic routing. Compute."
- id: 24
sym: "Gt"
name: "Gating"
block: C
row: 4
col: 6
year: null
desc: "Using data to scale or shut off other data (Multiplicative flow)."
bonds: ["Tn"]
why: "Row 3 (Architecture): conditional flow. Compute."
- id: 25
sym: "Nm"
name: "Normalization"
block: C
row: 4
col: 7
year: null
desc: "The transform that re-centers and re-scales data distributions between layers."
bonds: ["Tn", "Pm"]
why: "Row 3 (Architecture): distribution transform. Compute."
- id: 26
sym: "Ro"
name: "Routing"
block: C
row: 4
col: 8
year: null
desc: "Conditional data direction to specific sub-units (e.g., Experts)."
bonds: ["Gt", "Mk", "Dd"]
why: "Row 3 (Architecture): conditional flow. Compute."
- id: 27
sym: "Sk"
name: "Skip/Res"
block: X
row: 4
col: 10
year: null
desc: "The fundamental primitive of identity mapping. Allows information to bypass computation."
bonds: ["Tp"]
why: "Row 3 (Architecture): information highway. Communicate."
- id: 28
sym: "Fb"
name: "Feedback"
block: X
row: 4
col: 11
year: null
desc: "The structural primitive of routing a signal backward in the graph (Recurrence)."
bonds: ["Hs", "Tp"]
why: "Row 3 (Architecture): temporal loop. Communicate."
- id: 29
sym: "Mk"
name: "Masking"
block: K
row: 4
col: 13
year: null
desc: "The structural enforcement of causality or prevention of information leakage."
bonds: ["At", "Tp"]
why: "Row 3 (Architecture): structural constraint. Control."
- id: 30
sym: "Rf"
name: "Receptive Fld"
block: M
row: 4
col: 17
year: null
desc: "The measurement of how far information can travel within the architecture in one pass."
bonds: ["Tp", "At", "Cv"]
why: "Row 3 (Architecture): spatial/temporal reach. Measure."
- id: 31
sym: "Fc"
name: "Factorization"
block: R
row: 5
col: 1
year: null
desc: "Approximating a massive matrix as the product of smaller ones (Low-Rank)."
bonds: ["Pm", "Qz", "Sp"]
why: "Row 4 (Optimization): rank reduction. Represent."
- id: 32
sym: "Os"
name: "Optim State"
block: R
row: 5
col: 2
year: null
desc: "The irreducible memory of the optimization process (momentum, velocity)."
bonds: ["Gd", "Sc", "Pm"]
why: "Row 4 (Optimization): optimization memory. Represent."
- id: 33
sym: "Qz"
name: "Quantization"
block: C
row: 5
col: 5
year: null
desc: "Reducing the bit-width of numbers (FP8, INT4)."
bonds: ["Fc", "Sp", "Ws"]
why: "Row 4 (Optimization): precision reduction. Compute."
- id: 34
sym: "Sp"
name: "Sparsification"
block: C
row: 5
col: 6
year: null
desc: "Turning dense compute sparse by forcing weights or activations to zero."
bonds: ["Fc", "Qz", "Rg"]
why: "Row 4 (Optimization): density reduction. Compute."
- id: 35
sym: "Ws"
name: "Weight Sharing"
block: X
row: 5
col: 10
year: "1980s"
desc: "The structural optimization of communicating the same learned state across multiple functional paths (e.g., CNNs)."
bonds: ["Pm", "Tp"]
why: "Row 4 (Optimization): state reuse. Communicate."
- id: 36
sym: "En"
name: "Ensembling"
block: X
row: 5
col: 11
year: null
desc: "Merging weights or outputs across time/workers to improve generalization (SWA)."
bonds: ["Pm", "Gd", "Ws"]
why: "Row 4 (Optimization): spatial/temporal merging. Communicate."
- id: 37
sym: "Sc"
name: "Scheduling"
block: K
row: 5
col: 13
year: null
desc: "Dynamically decaying or modulating control signals over time."
bonds: ["Gd", "Rg"]
why: "Row 4 (Optimization): dynamic modulation. Control."
- id: 38
sym: "Rg"
name: "Regularization"
block: K
row: 5
col: 14
year: null
desc: "The structural penalty applied to the objective to force simpler solutions."
bonds: ["Sc", "Sp", "Ob"]
why: "Row 4 (Optimization): complexity penalty. Control."
- id: 39
sym: "Tm"
name: "Termination"
block: K
row: 5
col: 15
year: null
desc: "The control primitive that evaluates conditions to halt an iterative optimization loop."
bonds: ["Gd", "Lf"]
why: "Row 4 (Optimization): temporal bound. Control."
- id: 40
sym: "Id"
name: "Info Density"
block: M
row: 5
col: 17
year: null
desc: "The measure of optimization efficiency (Bits per Parameter)."
bonds: ["Qz", "Fc", "Sp"]
why: "Row 4 (Optimization): compression metric. Measure."
- id: 41
sym: "Cc"
name: "Caching"
block: R
row: 6
col: 1
year: null
desc: "Holding intermediate state in fast memory to prevent recomputation (e.g., KV Cache)."
bonds: ["At", "Bt", "Pl"]
why: "Row 5 (Runtime): state persistence. Represent."
- id: 42
sym: "Cp"
name: "Checkpointing"
block: R
row: 6
col: 2
year: null
desc: "Saving and restoring model state for fault tolerance or memory efficiency."
bonds: ["Pm", "As", "Al"]
why: "Row 5 (Runtime): state persistence. Represent."
- id: 43
sym: "Ir"
name: "Int. Rep."
block: R
row: 6
col: 3
year: null
desc: "The software state of a computation graph before hardware execution (ONNX, PT2)."
bonds: ["Cl", "Fs"]
why: "Row 5 (Runtime): structural state. Represent."
- id: 44
sym: "Fs"
name: "Fusion"
block: C
row: 6
col: 5
year: null
desc: "Merging multiple operations into a single execution kernel to minimize memory IO."
bonds: ["Op", "At", "Pl"]
why: "Row 5 (Runtime): op merging. Compute."
- id: 45
sym: "Bt"
name: "Batching"
block: C
row: 6
col: 6
year: null
desc: "Grouping independent inputs for parallel processing."
bonds: ["Cc", "Dd", "Pl"]
why: "Row 5 (Runtime): request grouping. Compute."
- id: 46
sym: "Ti"
name: "Tiling"
block: C
row: 6
col: 7
year: null
desc: "Partitioning computation into sub-blocks to optimize for memory hierarchy."
bonds: ["Ma", "Sr", "Fs"]
why: "Row 5 (Runtime): compute partitioning. Compute."
- id: 47
sym: "Cl"
name: "Compilation"
block: C
row: 6
col: 8
year: null
desc: "Lowering high-level operators into hardware-executable kernels."
bonds: ["Ir", "Fs", "Ti"]
why: "Row 5 (Runtime): graph-to-kernel translation. Compute."
- id: 48
sym: "Pl"
name: "Pipelining"
block: X
row: 6
col: 10
year: null
desc: "Overlapping the execution of sequential stages across different compute units."
bonds: ["Bt", "Sy", "Al"]
why: "Row 5 (Runtime): stage scheduling. Communicate."
- id: 49
sym: "Sy"
name: "Sync / Coll"
block: X
row: 6
col: 11
year: null
desc: "Aggregating and broadcasting state across distributed devices."
bonds: ["Ad", "Gd", "Pl"]
why: "Row 5 (Runtime): gradient/state sync. Communicate."
- id: 50
sym: "Pf"
name: "Prefetching"
block: X
row: 6
col: 12
year: null
desc: "Proactively moving data into faster memory tiers before it is needed."
bonds: ["Ic", "Dr", "Pl"]
why: "Row 5 (Runtime): data anticipation. Communicate."
- id: 51
sym: "Al"
name: "Allocation"
block: K
row: 6
col: 13
year: null
desc: "The dynamic assignment of hardware resources to software tasks."
bonds: ["Cc", "Cp", "Ar"]
why: "Row 5 (Runtime): resource control. Control."
- id: 52
sym: "Ut"
name: "Utilization"
block: M
row: 6
col: 17
year: null
desc: "The percentage of theoretical hardware capacity actively used (MFU)."
bonds: ["Bt", "Fs"]
why: "Row 5 (Runtime): efficiency metric. Measure."
- id: 53
sym: "Sr"
name: "SRAM"
block: R
row: 7
col: 1
year: null
desc: "On-chip, low-capacity, extremely high-bandwidth memory (Registers, Scratchpads)."
bonds: ["Cc", "Ma", "Ic"]
why: "Row 6 (Hardware): fast state. Represent."
- id: 54
sym: "Dr"
name: "DRAM"
block: R
row: 7
col: 2
year: null
desc: "Off-chip, high-capacity, lower-bandwidth memory (HBM, DDR)."
bonds: ["Cp", "Sr", "Ic"]
why: "Row 6 (Hardware): bulk state. Represent."
- id: 55
sym: "Ma"
name: "MAC Unit"
block: C
row: 7
col: 5
year: null
desc: "Multiply-Accumulate unit. The fundamental silicon logic gate for tensor math."
bonds: ["Sr", "Dd", "Vu"]
why: "Row 6 (Hardware): arithmetic logic. Compute."
- id: 56
sym: "Vu"
name: "Vector Unit"
block: C
row: 7
col: 6
year: null
desc: "Single Instruction, Multiple Data (SIMD) ALU. The silicon primitive for parallel arithmetic."
bonds: ["Ma", "Sr"]
why: "Row 6 (Hardware): parallel compute logic. Compute."
- id: 57
sym: "Ic"
name: "Interconnect"
block: X
row: 7
col: 10
year: null
desc: "The physical wiring moving data between silicon components (NoC, PCIe, NVLink)."
bonds: ["Sr", "Dr", "Sy"]
why: "Row 6 (Hardware): device link. Communicate."
- id: 58
sym: "Rt"
name: "HW Router"
block: X
row: 7
col: 11
year: null
desc: "Silicon logic that directs packets across the physical interconnect."
bonds: ["Ic", "Ar"]
why: "Row 6 (Hardware): physical network logic. Communicate."
- id: 59
sym: "Ar"
name: "Arbiter"
block: K
row: 7
col: 13
year: null
desc: "Hardware logic that schedules instructions and manages contention."
bonds: ["Ma", "Ic", "Al"]
why: "Row 6 (Hardware): execution control. Control."
- id: 60
sym: "Ck"
name: "Clock/Sync"
block: K
row: 7
col: 14
year: null
desc: "The hardware primitive for temporal control, synchronization, and barriers."
bonds: ["Ar", "Ma"]
why: "Row 6 (Hardware): temporal control. Control."
- id: 61
sym: "Ew"
name: "Energy"
block: M
row: 7
col: 17
year: null
desc: "The physical power consumed to perform computation (Joules/token)."
bonds: ["Ma", "Dr"]
why: "Row 6 (Hardware): power metric. Measure."
- id: 62
sym: "As"
name: "Artifact Store"
block: R
row: 8
col: 1
year: null
desc: "Durable, distributed storage for trained models and datasets (S3, Model Registry)."
bonds: ["Cp", "Dr", "Ex"]
why: "Row 7 (Production): persistent state. Represent."
- id: 63
sym: "Ex"
name: "Exec Engine"
block: C
row: 8
col: 5
year: null
desc: "The production worker node that executes compiled graphs on incoming requests."
bonds: ["As", "Bt", "Mq"]
why: "Row 7 (Production): execution loop. Compute."
- id: 64
sym: "Rp"
name: "RPC Protocol"
block: X
row: 8
col: 10
year: null
desc: "The synchronous network protocol for moving data between distributed services."
bonds: ["Ex", "Ld", "La"]
why: "Row 7 (Production): sync interface. Communicate."
- id: 65
sym: "Mq"
name: "Msg Queue"
block: X
row: 8
col: 11
year: null
desc: "The asynchronous network primitive for buffering and streaming data (Kafka)."
bonds: ["Ex", "Rp"]
why: "Row 7 (Production): async interface. Communicate."
- id: 66
sym: "Ld"
name: "Load Balancer"
block: K
row: 8
col: 13
year: null
desc: "The fleet-level control unit routing incoming requests to available hardware."
bonds: ["Rp", "Ex", "Oc"]
why: "Row 7 (Production): traffic control. Control."
- id: 67
sym: "Oc"
name: "Orchestrator"
block: K
row: 8
col: 14
year: null
desc: "The fleet-level control plane that scales, restarts, and manages the lifecycle of execution nodes (e.g., K8s)."
bonds: ["Ld", "Av"]
why: "Row 7 (Production): fleet control loop. Control."
- id: 68
sym: "La"
name: "Latency"
block: M
row: 8
col: 17
year: null
desc: "The end-to-end time from user request to final response."
bonds: ["Ex", "Rp"]
why: "Row 7 (Production): time metric. Measure."
- id: 69
sym: "Av"
name: "Availability"
block: M
row: 8
col: 18
year: null
desc: "Service Level Agreement metric measuring uptime and fault tolerance."
bonds: ["La", "Oc"]
why: "Row 7 (Production): reliability metric. Measure."
# ─── Compounds (53 total across 7 sections) ──────────────
# Formulas are written in the same notation as the paper:
# Sym -> two-letter element reference (resolves to elements[*].sym)
# _xxx -> subscript on the preceding token (e.g. Tk_patch, ]ᴺ_enc)
# -> sequential composition
# ‖ parallel
# ? conditional
# ⇌ adversarial
# ↺ feedback loop
# [...]ᴺ repeated block
- id: 70
sym: "Rc"
name: "Record"
block: R
row: 1
col: 1
year: null
desc: "The fundamental atomic unit of raw information (a single row, image, or document)."
bonds: []
why: "Row 0 (Data): the raw state. Represent."
- id: 71
sym: "Ds"
name: "Dataset"
block: R
row: 1
col: 2
year: null
desc: "A structured collection of records."
bonds: ["Rc", "Sm"]
why: "Row 0 (Data): the collective state. Represent."
- id: 72
sym: "Tr"
name: "Transform"
block: C
row: 1
col: 5
year: null
desc: "The deterministic action of altering raw data (cropping, resizing, parsing)."
bonds: ["Rc"]
why: "Row 0 (Data): raw manipulation. Compute."
- id: 73
sym: "Ag"
name: "Aggregate"
block: C
row: 1
col: 6
year: null
desc: "Combining multiple records into summary statistics."
bonds: ["Ds"]
why: "Row 0 (Data): statistical manipulation. Compute."
- id: 74
sym: "Fl"
name: "Flow/Stream"
block: X
row: 1
col: 10
year: null
desc: "The continuous movement of raw data from source to system (ETL, Kafka)."
bonds: ["Rc", "Ds"]
why: "Row 0 (Data): data pipeline. Communicate."
- id: 75
sym: "Fm"
name: "Format"
block: X
row: 1
col: 11
year: null
desc: "The structural encoding of data for storage or transit (Parquet, TFRecord)."
bonds: ["Rc", "Fl"]
why: "Row 0 (Data): serialization. Communicate."
- id: 76
sym: "Fi"
name: "Filter"
block: K
row: 1
col: 13
year: null
desc: "The deterministic logic that includes or excludes records based on predicates."
bonds: ["Rc", "Tr"]
why: "Row 0 (Data): data gating. Control."
- id: 77
sym: "Sm"
name: "Schema"
block: K
row: 1
col: 14
year: null
desc: "The structural constraint defining the expected types and fields of a record."
bonds: ["Rc", "Ds"]
why: "Row 0 (Data): type constraint. Control."
- id: 78
sym: "Vl"
name: "Volume"
block: M
row: 1
col: 17
year: null
desc: "The physical size or cardinality of the dataset (Bytes, Row Count)."
bonds: ["Ds"]
why: "Row 0 (Data): scale metric. Measure."
- id: 79
sym: "An"
name: "Analog ALU"
block: C
row: 7
col: 7
year: null
desc: "Continuous-voltage compute unit (e.g., memristor, optical) for extremely low-power inference."
bonds: ["Ma"]
why: "Row 6 (Hardware): non-digital compute. Compute."
- id: 80
sym: "En"
name: "Entropy"
block: M
row: 1
col: 18
year: "1948"
desc: "The Shannon information-theoretic limit; the absolute bound on data compressibility."
bonds: ["Vl"]
why: "Row 0 (Data): information limit. Measure."
- id: 81
sym: "Ix"
name: "Indexing"
block: R
row: 4
col: 3
year: null
desc: "The high-dimensional partitioning of vector space (e.g., HNSW) for sub-linear retrieval."
bonds: ["Tp"]
why: "Row 3 (Architecture): structured retrieval. Represent."
- id: 82
sym: "Ro"
name: "Routing"
block: K
row: 4
col: 14
year: null
desc: "The dynamic, data-dependent dispatch of tensors (e.g., Mixture of Experts)."
bonds: ["Gt"]
why: "Row 3 (Architecture): dynamic flow. Control."
- id: 83
sym: "Vr"
name: "Virtualization"
block: R
row: 6
col: 4
year: null
desc: "The abstraction of physical memory via page tables (e.g., PagedAttention) to solve fragmentation."
bonds: ["Cc"]
why: "Row 5 (Runtime): memory mapping. Represent."
- id: 84
sym: "Td"
name: "Thermodynamics"
block: M
row: 7
col: 18
year: null
desc: "The ultimate physical limitation (Landauer limit, thermal throttling) capping system scale."
bonds: ["Ew"]
why: "Row 6 (Hardware): thermal limit. Measure."
- id: 85
sym: "Rs"
name: "Resilience"
block: K
row: 8
col: 15
year: null
desc: "The systemic countermeasures (checkpointing, elastic recovery) for macroscopic hardware decay."
bonds: ["Oc"]
why: "Row 7 (Production): fault tolerance. Control."
- id: 86
sym: "Ac"
name: "Activation"
block: C
row: 3
col: 9
year: null
desc: "Non-linear functions (ReLU, GELU) providing expressive power."
bonds: ["Dd"]
why: "Row 2 (Algorithm): non-linear transform. Compute."
- id: 87
sym: "St"
name: "State"
block: R
row: 2
col: 3
year: null
desc: "The mathematical representation of an environment or context (RL, SSMs)."
bonds: ["Ob"]
why: "Row 1 (Math): contextual state. Represent."
- id: 88
sym: "Re"
name: "Retrieve"
block: X
row: 5
col: 12
year: null
desc: "Fetching stored state or external knowledge (e.g., from a KV Cache or Vector DB)."
bonds: ["Hs"]
why: "Row 4 (Optimization): state retrieval. Communicate."
- id: 89
sym: "Wa"
name: "Weight Avg"
block: C
row: 5
col: 7
year: null
desc: "Averaging model weights across time or distributed workers (e.g., SWA, EMA)."
bonds: ["Pm"]
why: "Row 4 (Optimization): parameter smoothing. Compute."
- id: 90
sym: "Ct"
name: "Critic"
block: K
row: 3
col: 16
year: null
desc: "The value function evaluating the expected return of a state (Actor-Critic RL)."
bonds: ["St", "Gd"]
why: "Row 2 (Algorithm): evaluative model. Control."
compounds:
- section: "Core Architectures"
hint: "Fundamental end-to-end model structures"
items:
- name: "Transformer"
formula: "Eb → [(At ∥ Mk) → Nm → Sk → Dd]ᴺ"
- name: "Encoder-Decoder Transformer"
formula: "Eb → [(At ∥ Mk) → Nm → Sk → Dd]ᴺ_enc → [(At ∥ Mk) → At_cross → Nm → Sk → Dd]ᴺ_dec → Dd"
- name: "Vision Transformer (ViT)"
formula: "Tk_patch → Eb → [(At ∥ Mk) → Nm → Sk → Dd]ᴺ → Dd"
- name: "Multimodal (Whisper)"
formula: "Tk_audio → Eb → [(At ∥ Mk) → Nm → Sk → Dd]ᴺ"
- name: "CNN"
formula: "[Cv → Ac → Po]ᴺ → Dd"
- name: "ResNet"
formula: "Eb → [Cv → Nm → Sk]ᴺ → Po → Dd"
- name: "LSTM"
formula: "Sp → (Dd ∥ Dd) → Gt → Fb(Hs) → Ac"
- name: "State Space Model (SSM)"
formula: "Dd → Ac → Fb(Hs)"
- name: "Mamba (Selective SSM)"
formula: "Dd → Gt → Fb(Hs)"
- name: "GNN (Graph Neural Network)"
formula: "Tp → At → Po → Dd"
- section: "Structural & Training Patterns"
hint: "Reusable sub-blocks and paradigms"
items:
- name: "Linear Attention"
formula: "At → Fc → Fb(Hs)"
- name: "Mixture of Experts (MoE)"
formula: "Ro ? (Dd ∥ … ∥ Dd) → Gt"
- name: "Multi-Head Attention"
formula: "Dd → (At ∥ Mk) ∥ … ∥ (At ∥ Mk) → Dd"
- name: "Batch Normalization"
formula: "Bt → Nm"
- name: "Contrastive Learning (CLIP)"
formula: "(Tk_img ∥ Tk_txt) → (Eb ∥ Eb) → Dd → Ob_contrastive → Gd → Pm"
- name: "Masked Autoencoder (MAE)"
formula: "Mk → Eb → [(At ∥ Mk) → Nm → Sk → Dd]ᴺ → Ob"
- section: "Generative & Latent Models"
items:
- name: "Diffusion Model"
formula: "[St → Nm → (Dd → Ac → Sk)]ᴺ → Ob"
- name: "Diffusion Transformer (DiT)"
formula: "Tk → Eb → [(At ∥ Mk) → Nm → Sk → Dd]ᴺ → St"
- name: "VAE"
formula: "Eb → Dd → (Pr ∥ St) → Dd → Ob"
- name: "GAN"
formula: "(Dd → Ac) ⇌ (Dd → Ac) → Ob"
- name: "World Model (JEPA/Sora)"
formula: "Eb → [(At ∥ Mk) → Nm → Sk → Dd]ᴺ → Fb(Hs) → Ob"
- name: "Sparse Autoencoder (SAE)"
formula: "Hs → Dd → Ac → Sp → Dd → Ob"
- section: "Efficiency & Optimization"
items:
- name: "Knowledge Distillation"
formula: "Tp_teacher → Dv ← Tp_student → Gd"
- name: "Systolic Array (TPU Core)"
formula: "[Ma ↔ Ic]ᴺ"
- name: "Flash Attention"
formula: "At → (Ti ∥ Fs)"
- name: "LoRA"
formula: "Pm → Fc → Dd"
- name: "Adam Optimizer"
formula: "Gd → Os → Sc → Pm"
- name: "Weight Averaging (SWA)"
formula: "Gd → Wa → Pm"
- name: "BitNet (1-bit LLM)"
formula: "Qz → Dd → Ac"
- name: "Quantization-Aware Training (QAT)"
formula: "Qz → Gd → Pm"
- name: "Speculative Decoding"
formula: "St_draft → Rw → Bt"
- name: "Neural Architecture Search (NAS)"
formula: "Rw → Tp → Ob"
- name: "Hyperparameter Optimization (HPO)"
formula: "Rw → (Sc ∥ Rg) → Ob"
- name: "DP-SGD (Differential Privacy)"
formula: "(St ∥ Ct) → Gd → Pm"
- section: "Alignment & Fine-Tuning"
items:
- name: "RLHF"
formula: "St → Ob_reward → Gd → Pm"
- name: "DPO"
formula: "(St ∥ St) → Ob → Gd → Pm"
- name: "PPO"
formula: "St → (Ob ∥ Ct) → Gd → Pm"
- name: "Chain-of-Thought (CoT)"
formula: "St → Re(Hs) → Rw → Ob"
- name: "RAFT"
formula: "(Eb → Rw → Cc) → Gd → Pm"
- name: "Prompt Tuning"
formula: "Eb_prompt → [(At ∥ Mk) → Nm → Sk → Dd]ᴺ → Dd"
- section: "Distributed & Scaling"
items:
- name: "Data Parallelism (DP)"
formula: "Bt → Gd → Sy → Pm"
- name: "FSDP (Fully Sharded DP)"
formula: "Bt → Fc → Gd → Sy → Pm"
- name: "Pipeline Parallelism (PP)"
formula: "Pl → Sy → Al"
- name: "Tensor Parallelism (TP)"
formula: "Fc → Sy → Al"
- name: "Federated Learning"
formula: "Gd → Wa → Sy"
- name: "Model Merging / Ensembling"
formula: "(Pm ∥ Pm) → Wa"
- section: "System & Production"
items:
- name: "RAG"
formula: "Eb → Rw → Cc → (At ∥ Mk) → Dd"
- name: "Inference Service"
formula: "Rp → Ld → Ex → Cc"
- name: "Feature Store"
formula: "As → Cc → Rp"
- name: "KV Cache"
formula: "At → Cc"
- name: "Gradient Checkpointing"
formula: "Ad → Cp → Al"
# ─── Documented intentional symbol collisions ─────────────────
# Lookup behavior is last-wins; consumers may disambiguate by id
# or by (row, col). The validator allows only collisions listed here.
known_collisions:
- sym: "Sm"
ids: [14, 77]
note: "Schema (#77, row 1 col 13, K) + Sampling (#14, row 3 col 7, C)"
- sym: "En"
ids: [36, 80]
note: "Entropy (#80, row 1 col 14, M) + Ensembling (#36, row 5 col 10, X)"
- sym: "Sp"
ids: [10, 34]
note: "Sample (#10, row 3 col 3, R) + Sparsification (#34, row 5 col 5, C)"
- sym: "Ro"
ids: [26, 82]
note: "Routing (#26, row 4 col 7, C) + Routing (#82, row 4 col 13, K)"