cs249r_book/slides/mlsysim_tutorial.tex

\documentclass[aspectratio=169, 10pt]{beamer}

% Theme and styling to match MLSysBook aesthetic
\usetheme{metropolis}
\definecolor{HarvardCrimson}{RGB}{165, 28, 48}
\definecolor{DarkGray}{RGB}{51, 51, 51}
\setbeamercolor{frametitle}{bg=DarkGray, fg=white}
\setbeamercolor{palette primary}{bg=HarvardCrimson, fg=white}

\title{Quantitative ML Systems}
\subtitle{From the Iron Law of Performance to Agentic Infrastructure Design}
\author{Machine Learning Systems Textbook Team}
\date{Conference Tutorial}

\begin{document}

\maketitle

% --- Section 1: The Problem ---
\section{The Reasoning Gap}

\begin{frame}{The Crisis in Systems Research}
    \textbf{The Problem:} Systems are getting more complex, but the tools to think about them have not kept pace.
    \vspace{0.5cm}

    \begin{itemize}
        \item \textbf{Cycle-Accurate Simulators:} Require hours to compile, weeks to run a single LLaMA-70B epoch.
        \item \textbf{Spreadsheet Math:} Error-prone, silent unit conversions (GB vs GiB, MACs vs FLOPs), impossible to share or version-control.
        \item \textbf{The Result:} The "Reasoning Gap". A student cannot requisition a 10,000-GPU cluster to test how a Ring-AllReduce topology affects latency.
    \end{itemize}
\end{frame}

\begin{frame}{The Solution: MLSys$\cdot$im}
    Taking inspiration from Hennessy \& Patterson's MIPS simulator: we sacrifice cycle accuracy for \textbf{taxonomic completeness} and \textbf{execution speed}.
    \vspace{0.5cm}

    \begin{block}{What is MLSys$\cdot$im?}
        A pure-Python, dimensionally-strict analytical framework that evaluates the physics of ML workloads from single-node SRAM to 100,000-node datacenters in \emph{milliseconds}.
    \end{block}
\end{frame}

% --- Section 2: The Core Physics ---
\section{The Iron Law \& The 22 Walls}

\begin{frame}{The Iron Law of ML Performance}
    \centering
    \Large
    $$ T = \max\left( \frac{\text{OPs}}{\text{Peak}_{\text{FLOPS}} \times \eta}, \frac{\text{Bytes}}{BW_{\text{HBM}}} \right) + \text{Overhead} $$
    \vspace{0.5cm}


    \normalsize
    \begin{itemize}
        \item \textbf{The Memory Wall:} Why a 3.2x FLOPS upgrade (A100 $\rightarrow$ H100) often only yields a 1.7x speedup for LLMs.
        \item \textbf{The Efficiency ($\eta$):} Absorbs micro-architectural chaos into a single verifiable parameter.
    \end{itemize}
\end{frame}

\begin{frame}{The 5-Layer Stack}
    We strictly decouple \emph{Demand} from \emph{Supply}.
    \vspace{0.3cm}

    \begin{enumerate}
        \item \textbf{Workloads (Demand):} FLOPs, parameters, KV-cache needs.
        \item \textbf{Hardware (Supply):} Silicon specs (Peak FLOPS, Bandwidth).
        \item \textbf{Infrastructure (Environment):} Grid Carbon, PUE, WUE.
        \item \textbf{Systems (Topology):} Fleet composition, Network fabrics.
        \item \textbf{Solvers (Analysis):} The engines that evaluate the 22 physical walls.
    \end{enumerate}
\end{frame}

% --- Section 3: Live Examples ---
\section{Live Physics: From Node to Fleet}

\begin{frame}{Example 1: The $\$9$ Million Question}
    \textbf{Scenario:} Adding Chain-of-Thought (K=8) reasoning to production.

    \vspace{0.3cm}
    \textit{We use MLSys$\cdot$im to prove:}
    \begin{itemize}
        \item CoT scales at the memory-bound \emph{decode} rate (ITL).
        \item A 7x latency increase requires 7x more GPUs to maintain QPS.
        \item An algorithmic tweak turns a $\$1.2$M server bill into a $\$9.1$M capital expenditure.
    \end{itemize}
\end{frame}

% --- Section 4: The Agentic Future ---
\section{The Climax: Agentic Infrastructure Design}

\begin{frame}{MLSys$\cdot$im as an Agent Protocol (MCP)}
    \textbf{Vision:} AI designing AI infrastructure.
    \vspace{0.3cm}

    LLMs are terrible at math, but excellent at calling strictly-typed tools.
    \begin{itemize}
        \item \textbf{Bring Your Own YAML (BYOY):} Define chips declaratively.
        \item \textbf{Model Context Protocol (MCP):} MLSys$\cdot$im provides a native MCP server exposing its JSON schema.
        \item \textbf{The Agentic Loop:} Claude generates a cluster YAML $\rightarrow$ MLSys$\cdot$im evaluates it $\rightarrow$ returns physical bottleneck (e.g., OOM at batch 256) $\rightarrow$ Claude autonomously adjusts node count.
    \end{itemize}
\end{frame}

\begin{frame}{Summary \& Call to Action}
    \begin{center}
        \Large \textbf{Be the Standard, Not the Bottleneck.}
    \end{center}

    \vspace{0.5cm}
    \begin{itemize}
        \item \texttt{pip install mlsysim}
        \item Don't write a custom simulator for your next ISCA paper. Write an \texttt{mlsysim} plugin!
        \item Connect it to your AI agents today.
    \end{itemize}
    \vspace{0.5cm}
    \centering
    \textbf{github.com/harvard-edge/cs249r\_book}
\end{frame}

\end{document}