mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-29 09:08:54 -05:00
Adds PDF config for Volume II of the book
Creates a YAML configuration file specifically for generating the PDF version of Volume II: Machine Learning Systems at Scale. This configuration defines the project structure, book metadata (title, author, abstract), chapter organization, and PDF-specific settings like cover page design, table of contents depth, and inclusion of LaTeX files for custom styling. This allows for independent building and customization of the PDF output for Volume II.
This commit is contained in:
@@ -1,297 +1,187 @@
|
||||
# =============================================================================
|
||||
# VOLUME II HTML WEBSITE CONFIGURATION
|
||||
# VOLUME II PDF CONFIGURATION
|
||||
# =============================================================================
|
||||
# Builds only Volume II: Machine Learning Systems at Scale
|
||||
# Deploys to: mlsysbook.ai/vol2/
|
||||
#
|
||||
# Usage:
|
||||
# cp config/_quarto-pdf-vol2.yml _quarto.yml
|
||||
# quarto render --to titlepage-pdf
|
||||
# =============================================================================
|
||||
|
||||
project:
|
||||
type: website
|
||||
output-dir: _build/html-vol2
|
||||
execute-dir: project
|
||||
|
||||
post-render:
|
||||
- scripts/clean_svgs.py
|
||||
- scripts/fix_cross_references.py
|
||||
|
||||
preview:
|
||||
browser: false
|
||||
navigate: false
|
||||
|
||||
website:
|
||||
title: "Machine Learning Systems at Scale"
|
||||
description: "Volume II: Machine Learning Systems at Scale. Scale, distribute, and govern machine learning systems in production."
|
||||
site-url: https://mlsysbook.ai/vol2/
|
||||
|
||||
open-graph:
|
||||
locale: en_US
|
||||
site-name: "Machine Learning Systems"
|
||||
image: assets/images/covers/cover-hardcover-book.png
|
||||
image-width: 1200
|
||||
image-height: 630
|
||||
twitter-card:
|
||||
card-style: summary_large_image
|
||||
image: assets/images/covers/cover-hardcover-book.png
|
||||
|
||||
page-navigation: true
|
||||
reader-mode: false
|
||||
back-to-top-navigation: true
|
||||
bread-crumbs: true
|
||||
|
||||
search:
|
||||
keyboard-shortcut: ["/"]
|
||||
|
||||
google-analytics:
|
||||
tracking-id: "G-M21L0CBCVN"
|
||||
anonymize-ip: true
|
||||
version: 4
|
||||
|
||||
comments:
|
||||
hypothesis:
|
||||
theme: clean
|
||||
openSidebar: false
|
||||
|
||||
navbar:
|
||||
background: light
|
||||
logo: "assets/images/icons/favicon.png"
|
||||
search: true
|
||||
pinned: true
|
||||
collapse: true
|
||||
collapse-below: "lg"
|
||||
title: "Machine Learning Systems at Scale"
|
||||
left:
|
||||
- text: "Textbook"
|
||||
menu:
|
||||
- icon: book-half
|
||||
text: "Full Textbook"
|
||||
href: ../book/
|
||||
- text: "---"
|
||||
- icon: journal
|
||||
text: "Volume I: Foundations"
|
||||
href: ../vol1/
|
||||
- icon: journal
|
||||
text: "Volume II: Advanced"
|
||||
href: ./
|
||||
- text: "---"
|
||||
- icon: fire
|
||||
text: "TinyTorch"
|
||||
href: ../tinytorch/
|
||||
- icon: cpu
|
||||
text: "Hardware Kits"
|
||||
href: ../kits/
|
||||
- text: "---"
|
||||
- icon: lightbulb
|
||||
text: "Labs (Coming 2026)"
|
||||
href: ../labs/
|
||||
right:
|
||||
- icon: download
|
||||
text: "Downloads"
|
||||
menu:
|
||||
- icon: file-pdf
|
||||
text: "Volume II PDF"
|
||||
href: assets/downloads/Machine-Learning-Systems-Vol2.pdf
|
||||
target: _blank
|
||||
- icon: journal-text
|
||||
text: "Volume II EPUB"
|
||||
href: assets/downloads/Machine-Learning-Systems-Vol2.epub
|
||||
target: _blank
|
||||
- text: "---"
|
||||
- icon: file-pdf
|
||||
text: "Volume I PDF"
|
||||
href: ../vol1/assets/downloads/Machine-Learning-Systems-Vol1.pdf
|
||||
target: _blank
|
||||
- icon: star
|
||||
text: "Star"
|
||||
href: https://github.com/harvard-edge/cs249r_book#support-this-work
|
||||
target: _blank
|
||||
- icon: heart
|
||||
text: "Support"
|
||||
href: https://opencollective.com/mlsysbook
|
||||
target: _blank
|
||||
- icon: envelope
|
||||
text: "Subscribe"
|
||||
href: "#subscribe"
|
||||
id: "navbar-subscribe-btn"
|
||||
- icon: github
|
||||
text: "GitHub"
|
||||
menu:
|
||||
- icon: pencil
|
||||
text: "Edit this page"
|
||||
href: https://github.com/harvard-edge/cs249r_book
|
||||
target: _blank
|
||||
- icon: bug
|
||||
text: "Report an issue"
|
||||
href: https://github.com/harvard-edge/cs249r_book/issues/new
|
||||
target: _blank
|
||||
- icon: chat
|
||||
text: "Discussions"
|
||||
href: https://github.com/harvard-edge/cs249r_book/discussions
|
||||
target: _blank
|
||||
- icon: code
|
||||
text: "View source"
|
||||
href: https://github.com/harvard-edge/cs249r_book
|
||||
target: _blank
|
||||
output-dir: _build/pdf-vol2
|
||||
|
||||
book:
|
||||
favicon: assets/images/icons/favicon.png
|
||||
cover-image: assets/images/covers/cover-hardcover-book-vol2.png
|
||||
cover-image-alt: "Cover image."
|
||||
|
||||
sidebar:
|
||||
- id: vol2-content
|
||||
title: "Volume II"
|
||||
style: "floating"
|
||||
background: light
|
||||
collapse-level: 2
|
||||
contents:
|
||||
# Frontmatter
|
||||
- text: "Homepage"
|
||||
href: contents/vol2/index.qmd
|
||||
- href: contents/vol2/frontmatter/dedication.qmd
|
||||
- href: contents/vol2/frontmatter/foreword.qmd
|
||||
- href: contents/vol2/frontmatter/about.qmd
|
||||
- href: contents/vol2/frontmatter/acknowledgements.qmd
|
||||
- href: contents/vol2/frontmatter/notation.qmd
|
||||
- href: contents/frontmatter/socratiq/socratiq.qmd
|
||||
title: "Machine Learning Systems at Scale"
|
||||
|
||||
- text: "---"
|
||||
date: today
|
||||
date-format: long
|
||||
|
||||
# Part I: The Fleet
|
||||
- section: "Part I: The Fleet"
|
||||
id: vol2-fleet
|
||||
contents:
|
||||
- text: "Introduction"
|
||||
href: contents/vol2/introduction/introduction.qmd
|
||||
- href: contents/vol2/compute_infrastructure/compute_infrastructure.qmd
|
||||
- href: contents/vol2/network_fabrics/network_fabrics.qmd
|
||||
- href: contents/vol2/data_storage/data_storage.qmd
|
||||
author:
|
||||
name: Vijay Janapa Reddi
|
||||
email: vj@eecs.harvard.edu
|
||||
url: https://www.google.com/search?q=Vijay+Janapa+Reddi
|
||||
affiliations: Harvard University
|
||||
corresponding: true
|
||||
roles: "Author, editor and curator."
|
||||
|
||||
# Part II: Distributed ML
|
||||
- section: "Part II: Distributed ML"
|
||||
id: vol2-distributed
|
||||
contents:
|
||||
- href: contents/vol2/distributed_training/distributed_training.qmd
|
||||
- href: contents/vol2/collective_communication/collective_communication.qmd
|
||||
- href: contents/vol2/fault_tolerance/fault_tolerance.qmd
|
||||
- href: contents/vol2/fleet_orchestration/fleet_orchestration.qmd
|
||||
abstract: |
|
||||
Scale changes machine learning infrastructure in kind, not merely in degree. When training requires coordination across many machines, communication between nodes dominates compute time; hardware failures become statistically routine; and system behavior emerges from interactions that no single component determines. The fleet — the coordinated ensemble of compute, network, and storage spanning many machines — becomes the fundamental unit of analysis, requiring principles and architectures that single-machine reasoning cannot supply.
|
||||
|
||||
# Part III: Deployment at Scale
|
||||
- section: "Part III: Deployment at Scale"
|
||||
id: vol2-deployment
|
||||
contents:
|
||||
- href: contents/vol2/performance_engineering/performance_engineering.qmd
|
||||
- href: contents/vol2/inference/inference.qmd
|
||||
- href: contents/vol2/edge_intelligence/edge_intelligence.qmd
|
||||
- href: contents/vol2/ops_scale/ops_scale.qmd
|
||||
Four parts examine these principles. Part I establishes the physical infrastructure of large-scale ML: datacenter architecture and cooling constraints, the network fabrics that interconnect machines, and the distributed storage systems that feed training and serving pipelines. Part II examines distributed ML: parallelism strategies across nodes, collective communication algorithms, fault tolerance through checkpointing and elastic training, and fleet orchestration using cluster schedulers. Part III addresses deployment at scale: distributed inference and large-model serving, performance engineering, edge intelligence and federated learning, and production operations. Part IV addresses governance: security and privacy, adversarial robustness, sustainable computing under energy and carbon constraints, and responsible AI at fleet scale.
|
||||
|
||||
# Part IV: The Responsible Fleet
|
||||
- section: "Part IV: The Responsible Fleet"
|
||||
id: vol2-responsible
|
||||
contents:
|
||||
- href: contents/vol2/security_privacy/security_privacy.qmd
|
||||
- href: contents/vol2/robust_ai/robust_ai.qmd
|
||||
- href: contents/vol2/sustainable_ai/sustainable_ai.qmd
|
||||
- href: contents/vol2/responsible_ai/responsible_ai.qmd
|
||||
- href: contents/vol2/conclusion/conclusion.qmd
|
||||
|
||||
- text: "---"
|
||||
|
||||
# Appendices
|
||||
- section: "Appendices"
|
||||
id: vol2-appendices
|
||||
contents:
|
||||
- text: "Fleet Foundations"
|
||||
href: contents/vol2/backmatter/appendix_fleet.qmd
|
||||
- text: "Communication Foundations"
|
||||
href: contents/vol2/backmatter/appendix_communication.qmd
|
||||
- text: "Reliability Foundations"
|
||||
href: contents/vol2/backmatter/appendix_reliability.qmd
|
||||
- text: "The C³ Taxonomy"
|
||||
href: contents/vol2/backmatter/appendix_c3.qmd
|
||||
- text: "System Assumptions"
|
||||
href: contents/vol2/backmatter/appendix_assumptions.qmd
|
||||
- text: "Glossary"
|
||||
href: contents/vol2/backmatter/glossary/glossary.qmd
|
||||
Throughout, physical constraints drive architecture. Bisection bandwidth, MTBF analysis, queuing theory, and scaling efficiency metrics are the analytical instruments. Readers develop the capacity to design distributed training pipelines, reason quantitatively about fault tolerance and scaling efficiency, and govern production ML infrastructure with accountability and environmental awareness. Suitable for graduate students in distributed systems and machine learning systems, and for practitioners designing and operating ML infrastructure at production scale.
|
||||
|
||||
repo-url: https://github.com/harvard-edge/cs249r_book
|
||||
|
||||
page-footer:
|
||||
left: |
|
||||
© 2024-2025 Harvard University. Licensed under <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/">CC-BY-NC-SA 4.0</a>
|
||||
center: |
|
||||
<a href="../vol1/">← Back to Volume I</a>
|
||||
right:
|
||||
- icon: github
|
||||
href: https://github.com/harvard-edge/cs249r_book
|
||||
aria-label: "View source on GitHub"
|
||||
- icon: star
|
||||
href: https://github.com/harvard-edge/cs249r_book
|
||||
aria-label: "Star this repository"
|
||||
background: light
|
||||
border: true
|
||||
Written, edited and curated by Prof. Vijay Janapa Reddi (Harvard University)
|
||||
right: |
|
||||
Built with <a href="https://quarto.org/">Quarto</a>.
|
||||
|
||||
chapters:
|
||||
- index.qmd
|
||||
|
||||
# ==================================================
|
||||
# Volume II Frontmatter
|
||||
# ==================================================
|
||||
# - contents/vol2/frontmatter/dedication.qmd
|
||||
# - contents/vol2/frontmatter/foreword.qmd
|
||||
# - contents/vol2/frontmatter/about.qmd
|
||||
# - contents/vol2/frontmatter/acknowledgements.qmd
|
||||
# - contents/vol2/frontmatter/notation.qmd
|
||||
|
||||
# ==================================================
|
||||
# Part I: The Fleet
|
||||
# ==================================================
|
||||
# - contents/vol2/parts/fleet_principles.qmd
|
||||
- contents/vol2/introduction/introduction.qmd
|
||||
# - contents/vol2/compute_infrastructure/compute_infrastructure.qmd
|
||||
# - contents/vol2/network_fabrics/network_fabrics.qmd
|
||||
# - contents/vol2/data_storage/data_storage.qmd
|
||||
|
||||
# ==================================================
|
||||
# Part II: Distributed ML
|
||||
# ==================================================
|
||||
# - contents/vol2/parts/distributed_ml_principles.qmd
|
||||
# - contents/vol2/distributed_training/distributed_training.qmd
|
||||
# - contents/vol2/collective_communication/collective_communication.qmd
|
||||
# - contents/vol2/fault_tolerance/fault_tolerance.qmd
|
||||
# - contents/vol2/fleet_orchestration/fleet_orchestration.qmd
|
||||
|
||||
# ==================================================
|
||||
# Part III: Deployment at Scale
|
||||
# ==================================================
|
||||
# - contents/vol2/parts/deployment_principles.qmd
|
||||
# - contents/vol2/performance_engineering/performance_engineering.qmd
|
||||
# - contents/vol2/inference/inference.qmd
|
||||
# - contents/vol2/edge_intelligence/edge_intelligence.qmd
|
||||
# - contents/vol2/ops_scale/ops_scale.qmd
|
||||
|
||||
# ==================================================
|
||||
# Part IV: The Responsible Fleet
|
||||
# ==================================================
|
||||
# - contents/vol2/parts/responsible_fleet_principles.qmd
|
||||
# - contents/vol2/security_privacy/security_privacy.qmd
|
||||
# - contents/vol2/robust_ai/robust_ai.qmd
|
||||
# - contents/vol2/sustainable_ai/sustainable_ai.qmd
|
||||
# - contents/vol2/responsible_ai/responsible_ai.qmd
|
||||
# - contents/vol2/conclusion/conclusion.qmd
|
||||
# - contents/vol2/backmatter/references.qmd
|
||||
|
||||
# appendices:
|
||||
# - contents/vol2/backmatter/appendix_fleet.qmd
|
||||
# - contents/vol2/backmatter/appendix_communication.qmd
|
||||
# - contents/vol2/backmatter/appendix_reliability.qmd
|
||||
# - contents/vol2/backmatter/appendix_c3.qmd
|
||||
# - contents/vol2/backmatter/appendix_assumptions.qmd
|
||||
# - contents/vol2/backmatter/glossary/glossary.qmd
|
||||
|
||||
citation: true
|
||||
license: CC-BY-NC-SA
|
||||
|
||||
bibliography:
|
||||
- contents/vol2/backmatter/references.bib
|
||||
|
||||
format:
|
||||
html:
|
||||
lightbox: true
|
||||
mermaid:
|
||||
theme: default
|
||||
theme:
|
||||
light:
|
||||
- default
|
||||
- assets/styles/style-vol2.scss
|
||||
dark:
|
||||
- default
|
||||
- assets/styles/style-vol2.scss
|
||||
- assets/styles/dark-mode-vol2.scss
|
||||
respect-user-color-scheme: true
|
||||
title-prefix: ""
|
||||
pagetitle: "Machine Learning Systems at Scale"
|
||||
code-block-bg: true
|
||||
code-copy: true
|
||||
code-overflow: wrap
|
||||
language:
|
||||
title-block-author-single: "Author, Editor & Curator"
|
||||
title-block-published: "Last Updated"
|
||||
reference-location: margin
|
||||
citation-location: margin
|
||||
sidenote: true
|
||||
highlight-style: assets/styles/custom-code.theme
|
||||
code-link: true
|
||||
link-external-icon: false
|
||||
link-external-newwindow: true
|
||||
anchor-sections: true
|
||||
smooth-scroll: false
|
||||
citations-hover: false
|
||||
footnotes-hover: false
|
||||
fig-width: 7
|
||||
fig-height: 5
|
||||
toc: true
|
||||
toc-depth: 4
|
||||
toc-expand: true
|
||||
toc-title: "On this page"
|
||||
number-sections: false
|
||||
number-depth: 3
|
||||
include-in-header:
|
||||
text: |
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
|
||||
<link rel="manifest" href="/site.webmanifest">
|
||||
<link rel="apple-touch-icon" href="/assets/images/icons/favicon.png">
|
||||
<meta name="theme-color" content="#1F407A">
|
||||
<script type="module" src="/tools/scripts/socratiQ/bundle.js" defer></script>
|
||||
<script src="/assets/scripts/sidebar-auto-collapse.js" defer></script>
|
||||
<script src="/assets/scripts/version-link.js" defer></script>
|
||||
<script src="/assets/scripts/subscribe-modal.js" defer></script>
|
||||
citeproc: true
|
||||
|
||||
metadata-files:
|
||||
- config/shared/base/crossref-video.yml
|
||||
- config/shared/base/custom-numbered-blocks.yml
|
||||
- config/shared/base/execute-env.yml
|
||||
- config/shared/base/diagram.yml
|
||||
- config/shared/html/filters.yml
|
||||
- config/shared/html/filter-metadata.yml
|
||||
- config/shared/pdf/filters.yml
|
||||
- config/shared/pdf/filter-metadata.yml
|
||||
- config/shared/vol2/filter-metadata-paths.yml
|
||||
- config/shared/html/announcement.yml
|
||||
- config/shared/pdf/custom-numbered-blocks-overrides.yml
|
||||
- config/shared/pdf/titlepage-theme-common.yml
|
||||
- config/shared/pdf/titlepage-pdf-common.yml
|
||||
- config/shared/pdf/build-production-common.yml
|
||||
|
||||
format:
|
||||
titlepage-pdf:
|
||||
coverpage: true
|
||||
coverpage-title: "Machine Learning Systems"
|
||||
coverpage-bg-image: "assets/images/covers/cover-image-transparent-vol2.png"
|
||||
coverpage-author: ["Vijay Janapa Reddi"]
|
||||
coverpage-footer: "At Scale"
|
||||
coverpage-theme:
|
||||
page-text-align: "left"
|
||||
bg-image-left: "0.225\\paperwidth"
|
||||
bg-image-bottom: 9
|
||||
bg-image-rotate: 0
|
||||
bg-image-opacity: 1.0
|
||||
header-style: "none"
|
||||
date-style: "none"
|
||||
|
||||
footer-fontsize: 25
|
||||
footer-left: "0.075\\paperwidth"
|
||||
footer-bottom: "0.475\\paperwidth"
|
||||
footer-width: "0.9\\paperwidth"
|
||||
footer-align: "left"
|
||||
|
||||
title-fontsize: 52
|
||||
title-left: "0.075\\paperwidth"
|
||||
title-bottom: "0.4\\paperwidth"
|
||||
title-width: "0.9\\paperwidth"
|
||||
|
||||
author-style: "plain"
|
||||
author-sep: "newline"
|
||||
author-fontsize: 20
|
||||
author-align: "right"
|
||||
author-bottom: "0.225\\paperwidth"
|
||||
#author-left: "0.075\\paperwidth"
|
||||
author-left: ".925\\paperwidth"
|
||||
author-width: 6in
|
||||
|
||||
titlepage: true
|
||||
titlepage-theme:
|
||||
elements: [ "\\titleblock",
|
||||
"Prof. Vijay Janapa Reddi",
|
||||
"School of Engineering and Applied Sciences",
|
||||
"Harvard University",
|
||||
"\\vspace{80mm}",
|
||||
"With heartfelt gratitude to the community for their invaluable contributions and steadfast support.",
|
||||
"\\vfill",
|
||||
"{{< meta date >}}",
|
||||
"\\vfill"]
|
||||
|
||||
lof: true
|
||||
lot: true
|
||||
lol: true
|
||||
toc-depth: 3
|
||||
number-depth: 3
|
||||
keep-tex: true
|
||||
reference-location: margin
|
||||
citation-location: margin
|
||||
include-in-header:
|
||||
- file: "tex/theme-colors-vol2.tex"
|
||||
- file: "tex/header-includes.tex"
|
||||
- text: |
|
||||
\usepackage{needspace}
|
||||
\let\Needspace\needspace
|
||||
include-before-body:
|
||||
- file: "tex/before-body-includes-vol2.tex"
|
||||
|
||||
Reference in New Issue
Block a user