\documentclass[aspectratio=169, 10pt]{beamer} % Theme and styling to match MLSysBook aesthetic \usetheme{metropolis} \definecolor{HarvardCrimson}{RGB}{165, 28, 48} \definecolor{DarkGray}{RGB}{51, 51, 51} \setbeamercolor{frametitle}{bg=DarkGray, fg=white} \setbeamercolor{palette primary}{bg=HarvardCrimson, fg=white} \title{Quantitative ML Systems} \subtitle{From the Iron Law of Performance to Agentic Infrastructure Design} \author{Machine Learning Systems Textbook Team} \date{Conference Tutorial} \begin{document} \maketitle % --- Section 1: The Problem --- \section{The Reasoning Gap} \begin{frame}{The Crisis in Systems Research} \textbf{The Problem:} Systems are getting more complex, but the tools to think about them have not kept pace. \vspace{0.5cm} \begin{itemize} \item \textbf{Cycle-Accurate Simulators:} Require hours to compile, weeks to run a single LLaMA-70B epoch. \item \textbf{Spreadsheet Math:} Error-prone, silent unit conversions (GB vs GiB, MACs vs FLOPs), impossible to share or version-control. \item \textbf{The Result:} The "Reasoning Gap". A student cannot requisition a 10,000-GPU cluster to test how a Ring-AllReduce topology affects latency. \end{itemize} \end{frame} \begin{frame}{The Solution: MLSys$\cdot$im} Taking inspiration from Hennessy \& Patterson's MIPS simulator: we sacrifice cycle accuracy for \textbf{taxonomic completeness} and \textbf{execution speed}. \vspace{0.5cm} \begin{block}{What is MLSys$\cdot$im?} A pure-Python, dimensionally-strict analytical framework that evaluates the physics of ML workloads from single-node SRAM to 100,000-node datacenters in \emph{milliseconds}. \end{block} \end{frame} % --- Section 2: The Core Physics --- \section{The Iron Law \& The 22 Walls} \begin{frame}{The Iron Law of ML Performance} \centering \Large $$ T = \max\left( \frac{\text{OPs}}{\text{Peak}_{\text{FLOPS}} \times \eta}, \frac{\text{Bytes}}{BW_{\text{HBM}}} \right) + \text{Overhead} $$ \vspace{0.5cm} \normalsize \begin{itemize} \item \textbf{The Memory Wall:} Why a 3.2x FLOPS upgrade (A100 $\rightarrow$ H100) often only yields a 1.7x speedup for LLMs. \item \textbf{The Efficiency ($\eta$):} Absorbs micro-architectural chaos into a single verifiable parameter. \end{itemize} \end{frame} \begin{frame}{The 5-Layer Stack} We strictly decouple \emph{Demand} from \emph{Supply}. \vspace{0.3cm} \begin{enumerate} \item \textbf{Workloads (Demand):} FLOPs, parameters, KV-cache needs. \item \textbf{Hardware (Supply):} Silicon specs (Peak FLOPS, Bandwidth). \item \textbf{Infrastructure (Environment):} Grid Carbon, PUE, WUE. \item \textbf{Systems (Topology):} Fleet composition, Network fabrics. \item \textbf{Solvers (Analysis):} The engines that evaluate the 22 physical walls. \end{enumerate} \end{frame} % --- Section 3: Live Examples --- \section{Live Physics: From Node to Fleet} \begin{frame}{Example 1: The $\$9$ Million Question} \textbf{Scenario:} Adding Chain-of-Thought (K=8) reasoning to production. \vspace{0.3cm} \textit{We use MLSys$\cdot$im to prove:} \begin{itemize} \item CoT scales at the memory-bound \emph{decode} rate (ITL). \item A 7x latency increase requires 7x more GPUs to maintain QPS. \item An algorithmic tweak turns a $\$1.2$M server bill into a $\$9.1$M capital expenditure. \end{itemize} \end{frame} % --- Section 4: The Agentic Future --- \section{The Climax: Agentic Infrastructure Design} \begin{frame}{MLSys$\cdot$im as an Agent Protocol (MCP)} \textbf{Vision:} AI designing AI infrastructure. \vspace{0.3cm} LLMs are terrible at math, but excellent at calling strictly-typed tools. \begin{itemize} \item \textbf{Bring Your Own YAML (BYOY):} Define chips declaratively. \item \textbf{Model Context Protocol (MCP):} MLSys$\cdot$im provides a native MCP server exposing its JSON schema. \item \textbf{The Agentic Loop:} Claude generates a cluster YAML $\rightarrow$ MLSys$\cdot$im evaluates it $\rightarrow$ returns physical bottleneck (e.g., OOM at batch 256) $\rightarrow$ Claude autonomously adjusts node count. \end{itemize} \end{frame} \begin{frame}{Summary \& Call to Action} \begin{center} \Large \textbf{Be the Standard, Not the Bottleneck.} \end{center} \vspace{0.5cm} \begin{itemize} \item \texttt{pip install mlsysim} \item Don't write a custom simulator for your next ISCA paper. Write an \texttt{mlsysim} plugin! \item Connect it to your AI agents today. \end{itemize} \vspace{0.5cm} \centering \textbf{github.com/harvard-edge/cs249r\_book} \end{frame} \end{document}