mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-24 10:35:49 -05:00
1481 lines
87 KiB
HTML
1481 lines
87 KiB
HTML
|
||
<!DOCTYPE html>
|
||
|
||
|
||
<html lang="en" data-content_root="../" >
|
||
|
||
<head>
|
||
<meta charset="utf-8" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
|
||
<title>15. Quantization - Reduced Precision for Efficiency — Tiny🔥Torch</title>
|
||
|
||
|
||
|
||
<script data-cfasync="false">
|
||
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
||
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
|
||
</script>
|
||
|
||
<!-- Loaded before other Sphinx assets -->
|
||
<link href="../_static/styles/theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
<link href="../_static/styles/bootstrap.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
<link href="../_static/styles/pydata-sphinx-theme.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
|
||
|
||
<link href="../_static/vendor/fontawesome/6.5.2/css/all.min.css?digest=dfe6caa3a7d634c4db9b" rel="stylesheet" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-solid-900.woff2" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-brands-400.woff2" />
|
||
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.5.2/webfonts/fa-regular-400.woff2" />
|
||
|
||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=03e43079" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/styles/sphinx-book-theme.css?v=eba8b062" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/togglebutton.css?v=13237357" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/mystnb.8ecb98da25f57f5357bf6f572d296f466b2cfe2517ffebfabe82451661e28f02.css" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/sphinx-thebe.css?v=4fa983c6" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/sphinx-design.min.css?v=95c83b7e" />
|
||
<link rel="stylesheet" type="text/css" href="../_static/custom.css?v=afcf7c3c" />
|
||
|
||
<!-- Pre-loaded scripts that we'll load fully later -->
|
||
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
|
||
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
|
||
<script src="../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||
|
||
<script src="../_static/documentation_options.js?v=9eb32ce0"></script>
|
||
<script src="../_static/doctools.js?v=9a2dae69"></script>
|
||
<script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||
<script src="../_static/clipboard.min.js?v=a7894cd8"></script>
|
||
<script src="../_static/copybutton.js?v=f281be69"></script>
|
||
<script src="../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
|
||
<script>let toggleHintShow = 'Click to show';</script>
|
||
<script>let toggleHintHide = 'Click to hide';</script>
|
||
<script>let toggleOpenOnPrint = 'true';</script>
|
||
<script src="../_static/togglebutton.js?v=4a39c7ea"></script>
|
||
<script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
|
||
<script src="../_static/design-tabs.js?v=f930bc37"></script>
|
||
<script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"; const thebe_selector = ".thebe,.cell"; const thebe_selector_input = "pre"; const thebe_selector_output = ".output, .cell_output"</script>
|
||
<script async="async" src="../_static/sphinx-thebe.js?v=c100c467"></script>
|
||
<script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
|
||
<script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"; const thebe_selector = ".thebe,.cell"; const thebe_selector_input = "pre"; const thebe_selector_output = ".output, .cell_output"</script>
|
||
<script type="module" src="https://cdn.jsdelivr.net/npm/mermaid@10.6.1/dist/mermaid.esm.min.mjs"></script>
|
||
<script type="module" src="https://cdn.jsdelivr.net/npm/@mermaid-js/layout-elk@0.1.4/dist/mermaid-layout-elk.esm.min.mjs"></script>
|
||
<script type="module">import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@10.6.1/dist/mermaid.esm.min.mjs";import elkLayouts from "https://cdn.jsdelivr.net/npm/@mermaid-js/layout-elk@0.1.4/dist/mermaid-layout-elk.esm.min.mjs";mermaid.registerLayoutLoaders(elkLayouts);mermaid.initialize({startOnLoad:false});</script>
|
||
<script src="https://cdn.jsdelivr.net/npm/d3@7.9.0/dist/d3.min.js"></script>
|
||
<script type="module">import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@10.6.1/dist/mermaid.esm.min.mjs";
|
||
|
||
const defaultStyle = document.createElement('style');
|
||
defaultStyle.textContent = `pre.mermaid {
|
||
/* Same as .mermaid-container > pre */
|
||
display: block;
|
||
width: 100%;
|
||
}
|
||
|
||
pre.mermaid > svg {
|
||
/* Same as .mermaid-container > pre > svg */
|
||
height: 500px;
|
||
width: 100%;
|
||
max-width: 100% !important;
|
||
}
|
||
`;
|
||
document.head.appendChild(defaultStyle);
|
||
|
||
const fullscreenStyle = document.createElement('style');
|
||
fullscreenStyle.textContent = `.mermaid-container {
|
||
display: flex;
|
||
flex-direction: row;
|
||
width: 100%;
|
||
}
|
||
|
||
.mermaid-container > pre {
|
||
display: block;
|
||
width: 100%;
|
||
}
|
||
|
||
.mermaid-container > pre > svg {
|
||
height: 500px;
|
||
width: 100%;
|
||
max-width: 100% !important;
|
||
}
|
||
|
||
.mermaid-fullscreen-btn {
|
||
width: 28px;
|
||
height: 28px;
|
||
background: rgba(255, 255, 255, 0.95);
|
||
border: 1px solid rgba(0, 0, 0, 0.3);
|
||
border-radius: 4px;
|
||
cursor: pointer;
|
||
display: flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
transition: all 0.2s;
|
||
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.2);
|
||
font-size: 14px;
|
||
line-height: 1;
|
||
padding: 0;
|
||
color: #333;
|
||
}
|
||
|
||
.mermaid-fullscreen-btn:hover {
|
||
opacity: 100% !important;
|
||
background: rgba(255, 255, 255, 1);
|
||
box-shadow: 0 3px 10px rgba(0, 0, 0, 0.3);
|
||
transform: scale(1.1);
|
||
}
|
||
|
||
.mermaid-fullscreen-btn.dark-theme {
|
||
background: rgba(50, 50, 50, 0.95);
|
||
border: 1px solid rgba(255, 255, 255, 0.3);
|
||
color: #e0e0e0;
|
||
}
|
||
|
||
.mermaid-fullscreen-btn.dark-theme:hover {
|
||
background: rgba(60, 60, 60, 1);
|
||
box-shadow: 0 3px 10px rgba(255, 255, 255, 0.2);
|
||
}
|
||
|
||
.mermaid-fullscreen-modal {
|
||
display: none;
|
||
position: fixed !important;
|
||
top: 0 !important;
|
||
left: 0 !important;
|
||
width: 95vw;
|
||
height: 100vh;
|
||
background: rgba(255, 255, 255, 0.98);
|
||
z-index: 9999;
|
||
padding: 20px;
|
||
overflow: auto;
|
||
}
|
||
|
||
.mermaid-fullscreen-modal.dark-theme {
|
||
background: rgba(0, 0, 0, 0.98);
|
||
}
|
||
|
||
.mermaid-fullscreen-modal.active {
|
||
display: flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
}
|
||
|
||
.mermaid-container-fullscreen {
|
||
position: relative;
|
||
width: 95vw;
|
||
height: 90vh;
|
||
max-width: 95vw;
|
||
max-height: 90vh;
|
||
background: white;
|
||
border-radius: 8px;
|
||
padding: 20px;
|
||
box-shadow: 0 10px 40px rgba(0, 0, 0, 0.3);
|
||
overflow: auto;
|
||
display: flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
}
|
||
|
||
.mermaid-container-fullscreen.dark-theme {
|
||
background: #1a1a1a;
|
||
box-shadow: 0 10px 40px rgba(0, 0, 0, 0.8);
|
||
}
|
||
|
||
.mermaid-container-fullscreen pre.mermaid {
|
||
width: 100%;
|
||
height: 100%;
|
||
display: flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
}
|
||
|
||
.mermaid-container-fullscreen .mermaid svg {
|
||
height: 100% !important;
|
||
width: 100% !important;
|
||
cursor: grab;
|
||
}
|
||
|
||
.mermaid-fullscreen-close {
|
||
position: fixed !important;
|
||
top: 20px !important;
|
||
right: 20px !important;
|
||
width: 40px;
|
||
height: 40px;
|
||
background: rgba(255, 255, 255, 0.95);
|
||
border: 1px solid rgba(0, 0, 0, 0.2);
|
||
border-radius: 50%;
|
||
cursor: pointer;
|
||
z-index: 10000;
|
||
display: flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
|
||
transition: all 0.2s;
|
||
font-size: 24px;
|
||
line-height: 1;
|
||
color: #333;
|
||
}
|
||
|
||
.mermaid-fullscreen-close:hover {
|
||
background: white;
|
||
box-shadow: 0 6px 16px rgba(0, 0, 0, 0.4);
|
||
transform: scale(1.1);
|
||
}
|
||
|
||
.mermaid-fullscreen-close.dark-theme {
|
||
background: rgba(50, 50, 50, 0.95);
|
||
border: 1px solid rgba(255, 255, 255, 0.2);
|
||
color: #e0e0e0;
|
||
}
|
||
|
||
.mermaid-fullscreen-close.dark-theme:hover {
|
||
background: rgba(60, 60, 60, 1);
|
||
box-shadow: 0 6px 16px rgba(255, 255, 255, 0.2);
|
||
}
|
||
|
||
.mermaid-fullscreen-modal .mermaid-fullscreen-btn {
|
||
display: none !important;
|
||
}`;
|
||
document.head.appendChild(fullscreenStyle);
|
||
|
||
// Detect if page has dark background
|
||
const isDarkTheme = () => {
|
||
const bgColor = window.getComputedStyle(document.body).backgroundColor;
|
||
const match = bgColor.match(/rgb\((\d+),\s*(\d+),\s*(\d+)/);
|
||
if (match) {
|
||
const r = parseInt(match[1]);
|
||
const g = parseInt(match[2]);
|
||
const b = parseInt(match[3]);
|
||
const brightness = (r * 299 + g * 587 + b * 114) / 1000;
|
||
return brightness < 128;
|
||
}
|
||
return false;
|
||
};
|
||
|
||
const load = async () => {
|
||
await mermaid.run();
|
||
|
||
const all_mermaids = document.querySelectorAll(".mermaid");
|
||
const mermaids_processed = document.querySelectorAll(".mermaid[data-processed='true']");
|
||
|
||
if ("False" === "True") {
|
||
const mermaids_to_add_zoom = -1 === -1 ? all_mermaids.length : -1;
|
||
if(mermaids_to_add_zoom > 0) {
|
||
var svgs = d3.selectAll("");
|
||
if(all_mermaids.length !== mermaids_processed.length) {
|
||
setTimeout(load, 200);
|
||
return;
|
||
} else if(svgs.size() !== mermaids_to_add_zoom) {
|
||
setTimeout(load, 200);
|
||
return;
|
||
} else {
|
||
svgs.each(function() {
|
||
var svg = d3.select(this);
|
||
svg.html("<g class='wrapper'>" + svg.html() + "</g>");
|
||
var inner = svg.select("g");
|
||
var zoom = d3.zoom().on("zoom", function(event) {
|
||
inner.attr("transform", event.transform);
|
||
});
|
||
svg.call(zoom);
|
||
});
|
||
}
|
||
}
|
||
} else if(all_mermaids.length !== mermaids_processed.length) {
|
||
// Wait for mermaid to process all diagrams
|
||
setTimeout(load, 200);
|
||
return;
|
||
}
|
||
|
||
const darkTheme = isDarkTheme();
|
||
|
||
// Stop here if not adding fullscreen capability
|
||
if ("True" !== "True") return;
|
||
|
||
const modal = document.createElement('div');
|
||
modal.className = 'mermaid-fullscreen-modal' + (darkTheme ? ' dark-theme' : '');
|
||
modal.setAttribute('role', 'dialog');
|
||
modal.setAttribute('aria-modal', 'true');
|
||
modal.setAttribute('aria-label', 'Fullscreen diagram viewer');
|
||
modal.innerHTML = `
|
||
<button class="mermaid-fullscreen-close${darkTheme ? ' dark-theme' : ''}" aria-label="Close fullscreen">✕</button>
|
||
<div class="mermaid-container-fullscreen${darkTheme ? ' dark-theme' : ''}"></div>
|
||
`;
|
||
document.body.appendChild(modal);
|
||
|
||
const modalContent = modal.querySelector('.mermaid-container-fullscreen');
|
||
const closeBtn = modal.querySelector('.mermaid-fullscreen-close');
|
||
|
||
let previousScrollOffset = [window.scrollX, window.scrollY];
|
||
|
||
const closeModal = () => {
|
||
modal.classList.remove('active');
|
||
modalContent.innerHTML = '';
|
||
document.body.style.overflow = ''
|
||
window.scrollTo({left: previousScrollOffset[0], top: previousScrollOffset[1], behavior: 'instant'});
|
||
};
|
||
|
||
closeBtn.addEventListener('click', closeModal);
|
||
modal.addEventListener('click', (e) => {
|
||
if (e.target === modal) closeModal();
|
||
});
|
||
document.addEventListener('keydown', (e) => {
|
||
if (e.key === 'Escape' && modal.classList.contains('active')) {
|
||
closeModal();
|
||
}
|
||
});
|
||
|
||
const allButtons = [];
|
||
|
||
document.querySelectorAll('.mermaid').forEach((mermaidDiv) => {
|
||
if (mermaidDiv.parentNode.classList.contains('mermaid-container') ||
|
||
mermaidDiv.closest('.mermaid-fullscreen-modal')) {
|
||
return;
|
||
}
|
||
|
||
const container = document.createElement('div');
|
||
container.className = 'mermaid-container';
|
||
mermaidDiv.parentNode.insertBefore(container, mermaidDiv);
|
||
container.appendChild(mermaidDiv);
|
||
|
||
const fullscreenBtn = document.createElement('button');
|
||
fullscreenBtn.className = 'mermaid-fullscreen-btn' + (darkTheme ? ' dark-theme' : '');
|
||
fullscreenBtn.setAttribute('aria-label', 'View diagram in fullscreen');
|
||
fullscreenBtn.textContent = '⛶';
|
||
fullscreenBtn.style.opacity = '50%';
|
||
|
||
// Calculate dynamic position based on diagram's margin and padding
|
||
const diagramStyle = window.getComputedStyle(mermaidDiv);
|
||
const marginTop = parseFloat(diagramStyle.marginTop) || 0;
|
||
const marginRight = parseFloat(diagramStyle.marginRight) || 0;
|
||
const paddingTop = parseFloat(diagramStyle.paddingTop) || 0;
|
||
const paddingRight = parseFloat(diagramStyle.paddingRight) || 0;
|
||
fullscreenBtn.style.top = `${marginTop + paddingTop + 4}px`;
|
||
fullscreenBtn.style.right = `${marginRight + paddingRight + 4}px`;
|
||
|
||
fullscreenBtn.addEventListener('click', () => {
|
||
previousScrollOffset = [window.scroll, window.scrollY];
|
||
const clone = mermaidDiv.cloneNode(true);
|
||
modalContent.innerHTML = '';
|
||
modalContent.appendChild(clone);
|
||
|
||
const svg = clone.querySelector('svg');
|
||
if (svg) {
|
||
svg.removeAttribute('width');
|
||
svg.removeAttribute('height');
|
||
svg.style.width = '100%';
|
||
svg.style.height = 'auto';
|
||
svg.style.maxWidth = '100%';
|
||
svg.style.sdisplay = 'block';
|
||
|
||
if ("False" === "True") {
|
||
setTimeout(() => {
|
||
const g = svg.querySelector('g');
|
||
if (g) {
|
||
var svgD3 = d3.select(svg);
|
||
svgD3.html("<g class='wrapper'>" + svgD3.html() + "</g>");
|
||
var inner = svgD3.select("g");
|
||
var zoom = d3.zoom().on("zoom", function(event) {
|
||
inner.attr("transform", event.transform);
|
||
});
|
||
svgD3.call(zoom);
|
||
}
|
||
}, 100);
|
||
}
|
||
}
|
||
|
||
modal.classList.add('active');
|
||
document.body.style.overflow = 'hidden';
|
||
});
|
||
|
||
container.appendChild(fullscreenBtn);
|
||
allButtons.push(fullscreenBtn);
|
||
});
|
||
|
||
// Update theme classes when theme changes
|
||
const updateTheme = () => {
|
||
const dark = isDarkTheme();
|
||
allButtons.forEach(btn => {
|
||
if (dark) {
|
||
btn.classList.add('dark-theme');
|
||
} else {
|
||
btn.classList.remove('dark-theme');
|
||
}
|
||
});
|
||
if (dark) {
|
||
modal.classList.add('dark-theme');
|
||
modalContent.classList.add('dark-theme');
|
||
closeBtn.classList.add('dark-theme');
|
||
} else {
|
||
modal.classList.remove('dark-theme');
|
||
modalContent.classList.remove('dark-theme');
|
||
closeBtn.classList.remove('dark-theme');
|
||
}
|
||
};
|
||
|
||
// Watch for theme changes
|
||
const observer = new MutationObserver(updateTheme);
|
||
observer.observe(document.documentElement, {
|
||
attributes: true,
|
||
attributeFilter: ['class', 'style', 'data-theme']
|
||
});
|
||
observer.observe(document.body, {
|
||
attributes: true,
|
||
attributeFilter: ['class', 'style']
|
||
});
|
||
};
|
||
|
||
window.addEventListener("load", load);
|
||
</script>
|
||
<script>DOCUMENTATION_OPTIONS.pagename = 'modules/15_quantization_ABOUT';</script>
|
||
<script src="../_static/ml-timeline.js?v=76e9b3e3"></script>
|
||
<script src="../_static/wip-banner.js?v=5357532b"></script>
|
||
<script src="../_static/marimo-badges.js?v=1e5d2842"></script>
|
||
<script src="../_static/sidebar-link.js?v=404b701b"></script>
|
||
<script src="../_static/hero-carousel.js?v=10341d2a"></script>
|
||
<link rel="icon" href="../_static/favicon.svg"/>
|
||
<link rel="index" title="Index" href="../genindex.html" />
|
||
<link rel="search" title="Search" href="../search.html" />
|
||
<link rel="next" title="16. Compression - Pruning and Model Compression" href="16_compression_ABOUT.html" />
|
||
<link rel="prev" title="14. Profiling - Performance Measurement for ML Systems" href="14_profiling_ABOUT.html" />
|
||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||
<meta name="docsearch:language" content="en"/>
|
||
</head>
|
||
|
||
|
||
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
||
|
||
|
||
|
||
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
|
||
|
||
<div id="pst-scroll-pixel-helper"></div>
|
||
|
||
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
||
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
|
||
|
||
|
||
<input type="checkbox"
|
||
class="sidebar-toggle"
|
||
id="pst-primary-sidebar-checkbox"/>
|
||
<label class="overlay overlay-primary" for="pst-primary-sidebar-checkbox"></label>
|
||
|
||
<input type="checkbox"
|
||
class="sidebar-toggle"
|
||
id="pst-secondary-sidebar-checkbox"/>
|
||
<label class="overlay overlay-secondary" for="pst-secondary-sidebar-checkbox"></label>
|
||
|
||
<div class="search-button__wrapper">
|
||
<div class="search-button__overlay"></div>
|
||
<div class="search-button__search-container">
|
||
<form class="bd-search d-flex align-items-center"
|
||
action="../search.html"
|
||
method="get">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<input type="search"
|
||
class="form-control"
|
||
name="q"
|
||
id="search-input"
|
||
placeholder="Search this book..."
|
||
aria-label="Search this book..."
|
||
autocomplete="off"
|
||
autocorrect="off"
|
||
autocapitalize="off"
|
||
spellcheck="false"/>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
||
</form></div>
|
||
</div>
|
||
|
||
<div class="pst-async-banner-revealer d-none">
|
||
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
|
||
</div>
|
||
|
||
|
||
<header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
|
||
</header>
|
||
|
||
|
||
<div class="bd-container">
|
||
<div class="bd-container__inner bd-page-width">
|
||
|
||
|
||
|
||
<div class="bd-sidebar-primary bd-sidebar">
|
||
|
||
|
||
|
||
<div class="sidebar-header-items sidebar-primary__section">
|
||
|
||
|
||
|
||
|
||
</div>
|
||
|
||
<div class="sidebar-primary-items__start sidebar-primary__section">
|
||
<div class="sidebar-primary-item">
|
||
|
||
|
||
|
||
|
||
|
||
<a class="navbar-brand logo" href="../intro.html">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<img src="../_static/logo-tinytorch.png" class="logo__image only-light" alt="Tiny🔥Torch - Home"/>
|
||
<script>document.write(`<img src="../_static/logo-tinytorch.png" class="logo__image only-dark" alt="Tiny🔥Torch - Home"/>`);</script>
|
||
|
||
|
||
</a></div>
|
||
<div class="sidebar-primary-item">
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="fa-solid fa-magnifying-glass"></i>
|
||
<span class="search-button__default-text">Search</span>
|
||
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
||
</button>
|
||
`);
|
||
</script></div>
|
||
<div class="sidebar-primary-item"><nav class="bd-links bd-docs-nav" aria-label="Main">
|
||
<div class="bd-toc-item navbar-nav active">
|
||
|
||
<ul class="nav bd-sidenav bd-sidenav__home-link">
|
||
<li class="toctree-l1">
|
||
<a class="reference internal" href="../intro.html">
|
||
Getting Started
|
||
</a>
|
||
</li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🚀 Getting Started</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../quickstart-guide.html">Quick Start Guide</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../student-workflow.html">Student Workflow</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../usage-paths/classroom-use.html">For Instructors</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../instructor-guide.html">Instructor Guide</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../usage-paths/ta-guide.html">TA Guide</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../usage-paths/team-onboarding.html">Team Onboarding</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🏗 Foundation Tier (01-07)</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../tiers/foundation.html">📖 Tier Overview</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="01_tensor_ABOUT.html">01. Tensor</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="02_activations_ABOUT.html">02. Activations</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="03_layers_ABOUT.html">03. Layers</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="04_losses_ABOUT.html">04. Losses</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="05_autograd_ABOUT.html">05. Autograd</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="06_optimizers_ABOUT.html">06. Optimizers</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="07_training_ABOUT.html">07. Training</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🏛️ Architecture Tier (08-13)</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../tiers/architecture.html">📖 Tier Overview</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="08_dataloader_ABOUT.html">08. DataLoader</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="09_spatial_ABOUT.html">09. Convolutions</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="10_tokenization_ABOUT.html">10. Tokenization</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="11_embeddings_ABOUT.html">11. Embeddings</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="12_attention_ABOUT.html">12. Attention</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="13_transformers_ABOUT.html">13. Transformers</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">⏱️ Optimization Tier (14-19)</span></p>
|
||
<ul class="current nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../tiers/optimization.html">📖 Tier Overview</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="14_profiling_ABOUT.html">14. Profiling</a></li>
|
||
<li class="toctree-l1 current active"><a class="current reference internal" href="#">15. Quantization</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="16_compression_ABOUT.html">16. Compression</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="17_memoization_ABOUT.html">17. Memoization</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="18_acceleration_ABOUT.html">18. Acceleration</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="19_benchmarking_ABOUT.html">19. Benchmarking</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🏅 Capstone Competition</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../tiers/olympics.html">📖 Competition Overview</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="20_capstone_ABOUT.html">20. Torch Olympics</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🧭 Course Orientation</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../chapters/00-introduction.html">Course Structure</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../prerequisites.html">Prerequisites & Resources</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../chapters/learning-journey.html">Learning Journey</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../chapters/milestones.html">Historical Milestones</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../faq.html">FAQ</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🛠️ TITO CLI Reference</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../tito/overview.html">Command Overview</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../tito/modules.html">Module Workflow</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../tito/milestones.html">Milestone System</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../tito/data.html">Progress & Data</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../tito/troubleshooting.html">Troubleshooting</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../datasets.html">Datasets Guide</a></li>
|
||
</ul>
|
||
<p aria-level="2" class="caption" role="heading"><span class="caption-text">🤝 Community</span></p>
|
||
<ul class="nav bd-sidenav">
|
||
<li class="toctree-l1"><a class="reference internal" href="../community.html">Ecosystem</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../resources.html">Learning Resources</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../credits.html">Credits & Acknowledgments</a></li>
|
||
</ul>
|
||
|
||
</div>
|
||
</nav></div>
|
||
</div>
|
||
|
||
|
||
<div class="sidebar-primary-items__end sidebar-primary__section">
|
||
</div>
|
||
|
||
<div id="rtd-footer-container"></div>
|
||
|
||
|
||
</div>
|
||
|
||
<main id="main-content" class="bd-main" role="main">
|
||
|
||
|
||
|
||
<div class="sbt-scroll-pixel-helper"></div>
|
||
|
||
<div class="bd-content">
|
||
<div class="bd-article-container">
|
||
|
||
<div class="bd-header-article d-print-none">
|
||
<div class="header-article-items header-article__inner">
|
||
|
||
<div class="header-article-items__start">
|
||
|
||
<div class="header-article-item"><button class="sidebar-toggle primary-toggle btn btn-sm" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<span class="fa-solid fa-bars"></span>
|
||
</button></div>
|
||
|
||
</div>
|
||
|
||
|
||
<div class="header-article-items__end">
|
||
|
||
<div class="header-article-item">
|
||
|
||
<div class="article-header-buttons">
|
||
|
||
|
||
|
||
|
||
|
||
<div class="dropdown dropdown-source-buttons">
|
||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Source repositories">
|
||
<i class="fab fa-github"></i>
|
||
</button>
|
||
<ul class="dropdown-menu">
|
||
|
||
|
||
|
||
<li><a href="https://github.com/mlsysbook/TinyTorch" target="_blank"
|
||
class="btn btn-sm btn-source-repository-button dropdown-item"
|
||
title="Source repository"
|
||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fab fa-github"></i>
|
||
</span>
|
||
<span class="btn__text-container">Repository</span>
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
<li><a href="https://github.com/mlsysbook/TinyTorch/edit/main/site/modules/15_quantization_ABOUT.md" target="_blank"
|
||
class="btn btn-sm btn-source-edit-button dropdown-item"
|
||
title="Suggest edit"
|
||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-pencil-alt"></i>
|
||
</span>
|
||
<span class="btn__text-container">Suggest edit</span>
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
<li><a href="https://github.com/mlsysbook/TinyTorch/issues/new?title=Issue%20on%20page%20%2Fmodules/15_quantization_ABOUT.html&body=Your%20issue%20content%20here." target="_blank"
|
||
class="btn btn-sm btn-source-issues-button dropdown-item"
|
||
title="Open an issue"
|
||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-lightbulb"></i>
|
||
</span>
|
||
<span class="btn__text-container">Open issue</span>
|
||
</a>
|
||
</li>
|
||
|
||
</ul>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<div class="dropdown dropdown-download-buttons">
|
||
<button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
|
||
<i class="fas fa-download"></i>
|
||
</button>
|
||
<ul class="dropdown-menu">
|
||
|
||
|
||
|
||
<li><a href="../_sources/modules/15_quantization_ABOUT.md" target="_blank"
|
||
class="btn btn-sm btn-download-source-button dropdown-item"
|
||
title="Download source file"
|
||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-file"></i>
|
||
</span>
|
||
<span class="btn__text-container">.md</span>
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
<li>
|
||
<button onclick="window.print()"
|
||
class="btn btn-sm btn-download-pdf-button dropdown-item"
|
||
title="Print to PDF"
|
||
data-bs-placement="left" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-file-pdf"></i>
|
||
</span>
|
||
<span class="btn__text-container">.pdf</span>
|
||
</button>
|
||
</li>
|
||
|
||
</ul>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
<button onclick="toggleFullScreen()"
|
||
class="btn btn-sm btn-fullscreen-button"
|
||
title="Fullscreen mode"
|
||
data-bs-placement="bottom" data-bs-toggle="tooltip"
|
||
>
|
||
|
||
|
||
<span class="btn__icon-container">
|
||
<i class="fas fa-expand"></i>
|
||
</span>
|
||
|
||
</button>
|
||
|
||
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light"></i>
|
||
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark"></i>
|
||
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"></i>
|
||
</button>
|
||
`);
|
||
</script>
|
||
|
||
|
||
<script>
|
||
document.write(`
|
||
<button class="btn btn-sm pst-navbar-icon search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<i class="fa-solid fa-magnifying-glass fa-lg"></i>
|
||
</button>
|
||
`);
|
||
</script>
|
||
<button class="sidebar-toggle secondary-toggle btn btn-sm" title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
||
<span class="fa-solid fa-list"></span>
|
||
</button>
|
||
</div></div>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div id="jb-print-docs-body" class="onlyprint">
|
||
<h1>15. Quantization - Reduced Precision for Efficiency</h1>
|
||
<!-- Table of contents -->
|
||
<div id="print-main-content">
|
||
<div id="jb-print-toc">
|
||
|
||
<div>
|
||
<h2> Contents </h2>
|
||
</div>
|
||
<nav aria-label="Page">
|
||
<ul class="visible nav section-nav flex-column">
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#learning-objectives">Learning Objectives</a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#build-use-optimize">Build → Use → Optimize</a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-guide">Implementation Guide</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantization-flow-fp32-int8">Quantization Flow: FP32 → INT8</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#what-youre-actually-building-educational-quantization">What You’re Actually Building (Educational Quantization)</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#core-quantization-mathematics">Core Quantization Mathematics</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#calibration-the-critical-step">Calibration - The Critical Step</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#per-tensor-vs-per-channel-quantization">Per-Tensor vs Per-Channel Quantization</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantizedlinear-quantized-neural-network-layer">QuantizedLinear - Quantized Neural Network Layer</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-level-quantization">Model-Level Quantization</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#getting-started">Getting Started</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prerequisites">Prerequisites</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#development-workflow">Development Workflow</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#testing">Testing</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comprehensive-test-suite">Comprehensive Test Suite</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#test-coverage-areas">Test Coverage Areas</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#inline-testing-quantization-analysis">Inline Testing & Quantization Analysis</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#manual-testing-examples">Manual Testing Examples</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#systems-thinking-questions">Systems Thinking Questions</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#real-world-applications">Real-World Applications</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantization-mathematics">Quantization Mathematics</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#production-deployment-characteristics">Production Deployment Characteristics</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ready-to-build">Ready to Build?</a></li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div id="searchbox"></div>
|
||
<article class="bd-article">
|
||
|
||
<section id="quantization-reduced-precision-for-efficiency">
|
||
<h1>15. Quantization - Reduced Precision for Efficiency<a class="headerlink" href="#quantization-reduced-precision-for-efficiency" title="Link to this heading">#</a></h1>
|
||
<p><strong>OPTIMIZATION TIER</strong> | Difficulty: ⭐⭐⭐ (3/4) | Time: 5-6 hours</p>
|
||
<section id="overview">
|
||
<h2>Overview<a class="headerlink" href="#overview" title="Link to this heading">#</a></h2>
|
||
<p>This module implements quantization fundamentals: converting FP32 tensors to INT8 representation to reduce memory by 4×. You’ll build the mathematics of scale/zero-point quantization, implement quantized linear layers, and measure accuracy-efficiency trade-offs. CRITICAL HONESTY: You’re implementing quantization math in Python, NOT actual hardware INT8 operations. This teaches the principles that enable TensorFlow Lite/PyTorch Mobile deployment, but real speedups require specialized hardware (Edge TPU, Neural Engine) or compiled frameworks with INT8 kernels. Your implementation will be 4× more memory-efficient but not faster - understanding WHY teaches you what production quantization frameworks must optimize.</p>
|
||
</section>
|
||
<section id="learning-objectives">
|
||
<h2>Learning Objectives<a class="headerlink" href="#learning-objectives" title="Link to this heading">#</a></h2>
|
||
<p>By the end of this module, you will be able to:</p>
|
||
<ul class="simple">
|
||
<li><p><strong>Quantization Mathematics</strong>: Implement symmetric and asymmetric INT8 quantization with scale/zero-point parameter calculation</p></li>
|
||
<li><p><strong>Calibration Strategies</strong>: Design percentile-based calibration to minimize accuracy loss when selecting quantization parameters</p></li>
|
||
<li><p><strong>Memory-Accuracy Trade-offs</strong>: Measure when 4× memory reduction justifies 0.5-2% accuracy degradation for deployment</p></li>
|
||
<li><p><strong>Production Reality</strong>: Distinguish between educational quantization (Python simulation) vs production INT8 (hardware acceleration, kernel fusion)</p></li>
|
||
<li><p><strong>When to Quantize</strong>: Recognize deployment scenarios where quantization is mandatory (mobile/edge) vs optional (cloud serving)</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="build-use-optimize">
|
||
<h2>Build → Use → Optimize<a class="headerlink" href="#build-use-optimize" title="Link to this heading">#</a></h2>
|
||
<p>This module follows TinyTorch’s <strong>Build → Use → Optimize</strong> framework:</p>
|
||
<ol class="arabic simple">
|
||
<li><p><strong>Build</strong>: Implement INT8 quantization/dequantization, calibration logic, QuantizedLinear layers</p></li>
|
||
<li><p><strong>Use</strong>: Quantize trained models, measure accuracy degradation vs memory savings on MNIST/CIFAR</p></li>
|
||
<li><p><strong>Optimize</strong>: Analyze the accuracy-efficiency frontier - when does quantization enable deployment vs hurt accuracy unacceptably?</p></li>
|
||
</ol>
|
||
</section>
|
||
<section id="implementation-guide">
|
||
<h2>Implementation Guide<a class="headerlink" href="#implementation-guide" title="Link to this heading">#</a></h2>
|
||
<section id="quantization-flow-fp32-int8">
|
||
<h3>Quantization Flow: FP32 → INT8<a class="headerlink" href="#quantization-flow-fp32-int8" title="Link to this heading">#</a></h3>
|
||
<p>Quantization compresses weights by reducing precision, trading accuracy for memory efficiency:</p>
|
||
<pre class="mermaid">
|
||
graph LR
|
||
A[FP32 Weight<br/>4 bytes<br/>-3.14159] --> B[Quantize<br/>scale + zero_point]
|
||
B --> C[INT8 Weight<br/>1 byte<br/>-126]
|
||
C --> D[Dequantize<br/>Inference]
|
||
D --> E[FP32 Compute<br/>Result]
|
||
|
||
style A fill:#e3f2fd
|
||
style B fill:#fff3e0
|
||
style C fill:#f3e5f5
|
||
style D fill:#ffe0b2
|
||
style E fill:#f0fdf4
|
||
</pre><p><strong>Flow</strong>: Original FP32 → Calibrate scale → Store as INT8 (4× smaller) → Dequantize for computation → FP32 result</p>
|
||
</section>
|
||
<section id="what-youre-actually-building-educational-quantization">
|
||
<h3>What You’re Actually Building (Educational Quantization)<a class="headerlink" href="#what-youre-actually-building-educational-quantization" title="Link to this heading">#</a></h3>
|
||
<p><strong>Your Implementation:</strong></p>
|
||
<ul class="simple">
|
||
<li><p>Quantization math: FP32 → INT8 conversion with scale/zero-point</p></li>
|
||
<li><p>QuantizedLinear: Store weights as INT8, compute in simulated quantized arithmetic</p></li>
|
||
<li><p>Calibration: Find optimal scale parameters from representative data</p></li>
|
||
<li><p>Memory measurement: Verify 4× reduction (32 bits → 8 bits)</p></li>
|
||
</ul>
|
||
<p><strong>What You’re NOT Building:</strong></p>
|
||
<ul class="simple">
|
||
<li><p>Actual INT8 hardware operations (requires CPU VNNI, ARM NEON, GPU Tensor Cores)</p></li>
|
||
<li><p>Kernel fusion (eliminating quantize/dequantize overhead)</p></li>
|
||
<li><p>Mixed-precision execution graphs (FP32 for sensitive ops, INT8 for matmul)</p></li>
|
||
<li><p>Production deployment pipelines (TensorFlow Lite converter, ONNX Runtime optimization)</p></li>
|
||
</ul>
|
||
<p><strong>Why This Matters:</strong> Understanding quantization math is essential. But knowing that production speedups require hardware acceleration + compiler optimization prevents unrealistic expectations. Your 4× memory reduction is real; your lack of speedup teaches why TensorFlow Lite needs custom kernels.</p>
|
||
</section>
|
||
<section id="core-quantization-mathematics">
|
||
<h3>Core Quantization Mathematics<a class="headerlink" href="#core-quantization-mathematics" title="Link to this heading">#</a></h3>
|
||
<p><strong>Symmetric Quantization (Zero-Point = 0)</strong></p>
|
||
<p>Assumes data is centered around zero (common after BatchNorm):</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Quantization: FP32 → INT8</span>
|
||
<span class="n">scale</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">tensor</span><span class="p">))</span> <span class="o">/</span> <span class="mf">127.0</span> <span class="c1"># Scale factor</span>
|
||
<span class="n">quantized</span> <span class="o">=</span> <span class="nb">round</span><span class="p">(</span><span class="n">tensor</span> <span class="o">/</span> <span class="n">scale</span><span class="p">)</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="o">-</span><span class="mi">128</span><span class="p">,</span> <span class="mi">127</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">int8</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Dequantization: INT8 → FP32</span>
|
||
<span class="n">dequantized</span> <span class="o">=</span> <span class="n">quantized</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">float32</span><span class="p">)</span> <span class="o">*</span> <span class="n">scale</span>
|
||
</pre></div>
|
||
</div>
|
||
<ul class="simple">
|
||
<li><p><strong>Range</strong>: INT8 is [-128, 127] (256 values)</p></li>
|
||
<li><p><strong>Scale</strong>: Maps largest FP32 value to 127</p></li>
|
||
<li><p><strong>Zero-point</strong>: Always 0 (symmetric around origin)</p></li>
|
||
<li><p><strong>Use case</strong>: Weights after normalization, activations after BatchNorm</p></li>
|
||
</ul>
|
||
<p><strong>Asymmetric Quantization (With Zero-Point)</strong></p>
|
||
<p>Handles arbitrary data ranges (e.g., activations after ReLU: [0, max]):</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Quantization: FP32 → INT8</span>
|
||
<span class="n">min_val</span><span class="p">,</span> <span class="n">max_val</span> <span class="o">=</span> <span class="n">tensor</span><span class="o">.</span><span class="n">min</span><span class="p">(),</span> <span class="n">tensor</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
|
||
<span class="n">scale</span> <span class="o">=</span> <span class="p">(</span><span class="n">max_val</span> <span class="o">-</span> <span class="n">min_val</span><span class="p">)</span> <span class="o">/</span> <span class="mf">255.0</span>
|
||
<span class="n">zero_point</span> <span class="o">=</span> <span class="nb">round</span><span class="p">(</span><span class="o">-</span><span class="n">min_val</span> <span class="o">/</span> <span class="n">scale</span><span class="p">)</span>
|
||
<span class="n">quantized</span> <span class="o">=</span> <span class="nb">round</span><span class="p">(</span><span class="n">tensor</span> <span class="o">/</span> <span class="n">scale</span> <span class="o">+</span> <span class="n">zero_point</span><span class="p">)</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="o">-</span><span class="mi">128</span><span class="p">,</span> <span class="mi">127</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">int8</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Dequantization: INT8 → FP32</span>
|
||
<span class="n">dequantized</span> <span class="o">=</span> <span class="p">(</span><span class="n">quantized</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">float32</span><span class="p">)</span> <span class="o">-</span> <span class="n">zero_point</span><span class="p">)</span> <span class="o">*</span> <span class="n">scale</span>
|
||
</pre></div>
|
||
</div>
|
||
<ul class="simple">
|
||
<li><p><strong>Range</strong>: Uses full [-128, 127] even if data is [0, 5]</p></li>
|
||
<li><p><strong>Scale</strong>: Maps data range to INT8 range</p></li>
|
||
<li><p><strong>Zero-point</strong>: Offset ensuring FP32 zero maps to specific INT8 value</p></li>
|
||
<li><p><strong>Use case</strong>: ReLU activations, input images, any non-centered data</p></li>
|
||
</ul>
|
||
<p><strong>Trade-off:</strong> Symmetric is simpler (no zero-point storage/computation), asymmetric uses range more efficiently (better for skewed distributions).</p>
|
||
</section>
|
||
<section id="calibration-the-critical-step">
|
||
<h3>Calibration - The Critical Step<a class="headerlink" href="#calibration-the-critical-step" title="Link to this heading">#</a></h3>
|
||
<p>Quantization quality depends entirely on scale/zero-point selection. Poor choices destroy accuracy.</p>
|
||
<p><strong>Naive Approach (Don’t Do This):</strong></p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Use global min/max from training data</span>
|
||
<span class="n">scale</span> <span class="o">=</span> <span class="p">(</span><span class="n">tensor_max</span> <span class="o">-</span> <span class="n">tensor_min</span><span class="p">)</span> <span class="o">/</span> <span class="mi">255</span>
|
||
<span class="c1"># Problem: Single outlier wastes most INT8 range</span>
|
||
<span class="c1"># Example: data in [0, 5] but one outlier at 100 → scale = 100/255</span>
|
||
<span class="c1"># Result: 95% of data maps to only 13 INT8 values (5/100 * 255 = 13)</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><strong>Calibration Approach (Correct):</strong></p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Use percentile-based clipping</span>
|
||
<span class="n">max_val</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">percentile</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">calibration_data</span><span class="p">),</span> <span class="mf">99.9</span><span class="p">)</span>
|
||
<span class="n">scale</span> <span class="o">=</span> <span class="n">max_val</span> <span class="o">/</span> <span class="mi">127</span>
|
||
<span class="c1"># Clips 0.1% outliers, uses INT8 range efficiently</span>
|
||
<span class="c1"># 99.9th percentile ignores rare outliers, preserves typical range</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><strong>Calibration Process:</strong></p>
|
||
<ol class="arabic simple">
|
||
<li><p>Collect 100-1000 samples of representative data (validation set)</p></li>
|
||
<li><p>For each layer, record activation statistics during forward passes</p></li>
|
||
<li><p>Compute percentile-based min/max (typically 99.9th percentile)</p></li>
|
||
<li><p>Calculate scale/zero-point from clipped statistics</p></li>
|
||
<li><p>Quantize weights/activations using calibrated parameters</p></li>
|
||
</ol>
|
||
<p><strong>Why It Works:</strong> Most activations follow normal-ish distributions. Outliers are rare but dominate min/max. Clipping 0.1% of outliers uses INT8 range 10-100× more efficiently with negligible accuracy loss.</p>
|
||
</section>
|
||
<section id="per-tensor-vs-per-channel-quantization">
|
||
<h3>Per-Tensor vs Per-Channel Quantization<a class="headerlink" href="#per-tensor-vs-per-channel-quantization" title="Link to this heading">#</a></h3>
|
||
<p><strong>Per-Tensor Quantization:</strong></p>
|
||
<ul class="simple">
|
||
<li><p>One scale/zero-point for entire weight tensor</p></li>
|
||
<li><p>Simple: store 2 parameters per layer</p></li>
|
||
<li><p>Example: Conv2D with 64×3×3×3 weights uses 1 scale, 1 zero-point</p></li>
|
||
</ul>
|
||
<p><strong>Per-Channel Quantization:</strong></p>
|
||
<ul class="simple">
|
||
<li><p>Separate scale/zero-point per output channel</p></li>
|
||
<li><p>Better accuracy: each channel uses its natural range</p></li>
|
||
<li><p>Example: Conv2D with 64 output channels uses 64 scales, 64 zero-points</p></li>
|
||
<li><p>Overhead: 128 extra parameters (64 scales + 64 zero-points)</p></li>
|
||
</ul>
|
||
<p><strong>When to Use Per-Channel:</strong></p>
|
||
<ul class="simple">
|
||
<li><p>Weight magnitudes vary significantly across channels (common in Conv layers)</p></li>
|
||
<li><p>Accuracy improvement (0.5-1.5%) justifies 0.1-0.5% memory overhead</p></li>
|
||
<li><p>Production frameworks (PyTorch, TensorFlow Lite) default to per-channel for Conv/Linear</p></li>
|
||
</ul>
|
||
<p><strong>Trade-off Table:</strong></p>
|
||
<div class="pst-scrollable-table-container"><table class="table">
|
||
<thead>
|
||
<tr class="row-odd"><th class="head"><p>Quantization Scheme</p></th>
|
||
<th class="head"><p>Parameters</p></th>
|
||
<th class="head"><p>Accuracy</p></th>
|
||
<th class="head"><p>Complexity</p></th>
|
||
<th class="head"><p>Use Case</p></th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr class="row-even"><td><p>Per-Tensor</p></td>
|
||
<td><p>2 per layer</p></td>
|
||
<td><p>Baseline</p></td>
|
||
<td><p>Simple</p></td>
|
||
<td><p>Fast prototyping, small models</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>Per-Channel (Conv)</p></td>
|
||
<td><p>2N (N=channels)</p></td>
|
||
<td><p>+0.5-1.5%</p></td>
|
||
<td><p>Medium</p></td>
|
||
<td><p>Production Conv layers</p></td>
|
||
</tr>
|
||
<tr class="row-even"><td><p>Per-Channel (Linear)</p></td>
|
||
<td><p>2N (N=out_features)</p></td>
|
||
<td><p>+0.3-0.8%</p></td>
|
||
<td><p>Medium</p></td>
|
||
<td><p>Production Linear layers</p></td>
|
||
</tr>
|
||
<tr class="row-odd"><td><p>Mixed (Conv per-channel, Linear per-tensor)</p></td>
|
||
<td><p>Hybrid</p></td>
|
||
<td><p>+0.4-1.2%</p></td>
|
||
<td><p>Medium</p></td>
|
||
<td><p>Balanced approach</p></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
</section>
|
||
<section id="quantizedlinear-quantized-neural-network-layer">
|
||
<h3>QuantizedLinear - Quantized Neural Network Layer<a class="headerlink" href="#quantizedlinear-quantized-neural-network-layer" title="Link to this heading">#</a></h3>
|
||
<p>Replaces regular Linear layer with quantized equivalent:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">QuantizedLinear</span><span class="p">:</span>
|
||
<span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">linear_layer</span><span class="p">:</span> <span class="n">Linear</span><span class="p">):</span>
|
||
<span class="c1"># Quantize weights at initialization</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">weights_int8</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">weight_scale</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">weight_zp</span> <span class="o">=</span> <span class="n">quantize_int8</span><span class="p">(</span><span class="n">linear_layer</span><span class="o">.</span><span class="n">weight</span><span class="p">)</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">bias_int8</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bias_scale</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bias_zp</span> <span class="o">=</span> <span class="n">quantize_int8</span><span class="p">(</span><span class="n">linear_layer</span><span class="o">.</span><span class="n">bias</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Store original FP32 for accuracy comparison</span>
|
||
<span class="bp">self</span><span class="o">.</span><span class="n">original_weight</span> <span class="o">=</span> <span class="n">linear_layer</span><span class="o">.</span><span class="n">weight</span>
|
||
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Tensor</span><span class="p">)</span> <span class="o">-></span> <span class="n">Tensor</span><span class="p">:</span>
|
||
<span class="c1"># EDUCATIONAL VERSION: Dequantize → compute in FP32 → quantize result</span>
|
||
<span class="c1"># (Simulates quantization math but doesn't speed up computation)</span>
|
||
<span class="n">weight_fp32</span> <span class="o">=</span> <span class="n">dequantize_int8</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">weights_int8</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">weight_scale</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">weight_zp</span><span class="p">)</span>
|
||
<span class="n">bias_fp32</span> <span class="o">=</span> <span class="n">dequantize_int8</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">bias_int8</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bias_scale</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bias_zp</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Compute in FP32 (not actually faster - just lower precision storage)</span>
|
||
<span class="n">output</span> <span class="o">=</span> <span class="n">x</span> <span class="o">@</span> <span class="n">weight_fp32</span><span class="o">.</span><span class="n">T</span> <span class="o">+</span> <span class="n">bias_fp32</span>
|
||
<span class="k">return</span> <span class="n">output</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><strong>What Happens in Production (TensorFlow Lite, PyTorch Mobile):</strong></p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Production quantized matmul (conceptual - happens in C++/assembly)</span>
|
||
<span class="k">def</span><span class="w"> </span><span class="nf">quantized_matmul_production</span><span class="p">(</span><span class="n">x_int8</span><span class="p">,</span> <span class="n">weight_int8</span><span class="p">,</span> <span class="n">x_scale</span><span class="p">,</span> <span class="n">weight_scale</span><span class="p">,</span> <span class="n">output_scale</span><span class="p">):</span>
|
||
<span class="c1"># 1. INT8 x INT8 matmul using VNNI/NEON/Tensor Cores (FAST)</span>
|
||
<span class="n">accum_int32</span> <span class="o">=</span> <span class="n">matmul_int8_hardware</span><span class="p">(</span><span class="n">x_int8</span><span class="p">,</span> <span class="n">weight_int8</span><span class="p">)</span> <span class="c1"># Specialized instruction</span>
|
||
|
||
<span class="c1"># 2. Requantize accumulated INT32 → INT8 output</span>
|
||
<span class="n">combined_scale</span> <span class="o">=</span> <span class="p">(</span><span class="n">x_scale</span> <span class="o">*</span> <span class="n">weight_scale</span><span class="p">)</span> <span class="o">/</span> <span class="n">output_scale</span>
|
||
<span class="n">output_int8</span> <span class="o">=</span> <span class="p">(</span><span class="n">accum_int32</span> <span class="o">*</span> <span class="n">combined_scale</span><span class="p">)</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="o">-</span><span class="mi">128</span><span class="p">,</span> <span class="mi">127</span><span class="p">)</span>
|
||
|
||
<span class="c1"># 3. Stay in INT8 for next layer (no dequantization unless necessary)</span>
|
||
<span class="k">return</span> <span class="n">output_int8</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><strong>Key Differences:</strong></p>
|
||
<ul class="simple">
|
||
<li><p><strong>Your implementation</strong>: Dequantize → FP32 compute → quantize (educational, slow)</p></li>
|
||
<li><p><strong>Production</strong>: INT8 → INT8 throughout, specialized hardware (4-10× speedup)</p></li>
|
||
</ul>
|
||
<p><strong>Memory Savings (Real):</strong> 4× reduction from storing INT8 instead of FP32
|
||
<strong>Speed Improvement (Your Code):</strong> ~0× (Python overhead dominates)
|
||
<strong>Speed Improvement (Production):</strong> 2-10× (hardware acceleration, kernel fusion)</p>
|
||
</section>
|
||
<section id="model-level-quantization">
|
||
<h3>Model-Level Quantization<a class="headerlink" href="#model-level-quantization" title="Link to this heading">#</a></h3>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">quantize_model</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">calibration_data</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||
<span class="w"> </span><span class="sd">"""</span>
|
||
<span class="sd"> Quantize all Linear layers in model.</span>
|
||
|
||
<span class="sd"> Args:</span>
|
||
<span class="sd"> model: Neural network with Linear layers</span>
|
||
<span class="sd"> calibration_data: Representative samples for activation calibration</span>
|
||
|
||
<span class="sd"> Returns:</span>
|
||
<span class="sd"> quantized_model: Model with QuantizedLinear layers</span>
|
||
<span class="sd"> calibration_stats: Scale/zero-point parameters per layer</span>
|
||
<span class="sd"> """</span>
|
||
<span class="n">quantized_layers</span> <span class="o">=</span> <span class="p">[]</span>
|
||
<span class="k">for</span> <span class="n">layer</span> <span class="ow">in</span> <span class="n">model</span><span class="o">.</span><span class="n">layers</span><span class="p">:</span>
|
||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">layer</span><span class="p">,</span> <span class="n">Linear</span><span class="p">):</span>
|
||
<span class="n">q_layer</span> <span class="o">=</span> <span class="n">QuantizedLinear</span><span class="p">(</span><span class="n">layer</span><span class="p">)</span>
|
||
<span class="k">if</span> <span class="n">calibration_data</span><span class="p">:</span>
|
||
<span class="n">q_layer</span><span class="o">.</span><span class="n">calibrate</span><span class="p">(</span><span class="n">calibration_data</span><span class="p">)</span> <span class="c1"># Find optimal scales</span>
|
||
<span class="n">quantized_layers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">q_layer</span><span class="p">)</span>
|
||
<span class="k">else</span><span class="p">:</span>
|
||
<span class="n">quantized_layers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">layer</span><span class="p">)</span> <span class="c1"># Keep ReLU, Softmax in FP32</span>
|
||
|
||
<span class="k">return</span> <span class="n">quantized_layers</span>
|
||
</pre></div>
|
||
</div>
|
||
<p><strong>Calibration in Practice:</strong></p>
|
||
<ol class="arabic simple">
|
||
<li><p>Run 100-1000 samples through original FP32 model</p></li>
|
||
<li><p>Record min/max activations for each layer</p></li>
|
||
<li><p>Compute percentile-clipped scales</p></li>
|
||
<li><p>Quantize weights with calibrated parameters</p></li>
|
||
<li><p>Test accuracy on validation set</p></li>
|
||
</ol>
|
||
</section>
|
||
</section>
|
||
<section id="getting-started">
|
||
<h2>Getting Started<a class="headerlink" href="#getting-started" title="Link to this heading">#</a></h2>
|
||
<section id="prerequisites">
|
||
<h3>Prerequisites<a class="headerlink" href="#prerequisites" title="Link to this heading">#</a></h3>
|
||
<p>Ensure you’ve completed profiling fundamentals:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># Activate TinyTorch environment</span>
|
||
<span class="nb">source</span><span class="w"> </span>scripts/activate-tinytorch
|
||
|
||
<span class="c1"># Verify prerequisite modules</span>
|
||
tito<span class="w"> </span><span class="nb">test</span><span class="w"> </span>--module<span class="w"> </span>profiling
|
||
</pre></div>
|
||
</div>
|
||
<p><strong>Required Understanding:</strong></p>
|
||
<ul class="simple">
|
||
<li><p>Memory profiling (Module 14): Measuring memory consumption</p></li>
|
||
<li><p>Tensor operations (Module 01): Understanding FP32 representation</p></li>
|
||
<li><p>Linear layers (Module 03): Matrix multiplication mechanics</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="development-workflow">
|
||
<h3>Development Workflow<a class="headerlink" href="#development-workflow" title="Link to this heading">#</a></h3>
|
||
<ol class="arabic simple">
|
||
<li><p><strong>Open the development file</strong>: <code class="docutils literal notranslate"><span class="pre">modules/15_quantization/quantization_dev.py</span></code></p></li>
|
||
<li><p><strong>Implement quantize_int8()</strong>: FP32 → INT8 conversion with scale/zero-point calculation</p></li>
|
||
<li><p><strong>Implement dequantize_int8()</strong>: INT8 → FP32 restoration</p></li>
|
||
<li><p><strong>Build QuantizedLinear</strong>: Replace Linear layers with quantized versions</p></li>
|
||
<li><p><strong>Add calibration logic</strong>: Percentile-based scale selection</p></li>
|
||
<li><p><strong>Implement quantize_model()</strong>: Convert entire networks to quantized form</p></li>
|
||
<li><p><strong>Export and verify</strong>: <code class="docutils literal notranslate"><span class="pre">tito</span> <span class="pre">module</span> <span class="pre">complete</span> <span class="pre">15</span> <span class="pre">&&</span> <span class="pre">tito</span> <span class="pre">test</span> <span class="pre">--module</span> <span class="pre">quantization</span></code></p></li>
|
||
</ol>
|
||
</section>
|
||
</section>
|
||
<section id="testing">
|
||
<h2>Testing<a class="headerlink" href="#testing" title="Link to this heading">#</a></h2>
|
||
<section id="comprehensive-test-suite">
|
||
<h3>Comprehensive Test Suite<a class="headerlink" href="#comprehensive-test-suite" title="Link to this heading">#</a></h3>
|
||
<p>Run the full test suite to verify quantization functionality:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># TinyTorch CLI (recommended)</span>
|
||
tito<span class="w"> </span><span class="nb">test</span><span class="w"> </span>--module<span class="w"> </span>quantization
|
||
|
||
<span class="c1"># Direct pytest execution</span>
|
||
python<span class="w"> </span>-m<span class="w"> </span>pytest<span class="w"> </span>tests/<span class="w"> </span>-k<span class="w"> </span>quantization<span class="w"> </span>-v
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="test-coverage-areas">
|
||
<h3>Test Coverage Areas<a class="headerlink" href="#test-coverage-areas" title="Link to this heading">#</a></h3>
|
||
<ul class="simple">
|
||
<li><p>✅ <strong>Quantization Correctness</strong>: FP32 → INT8 → FP32 roundtrip error bounds (< 0.5% mean error)</p></li>
|
||
<li><p>✅ <strong>Memory Reduction</strong>: Verify 4× reduction in model size (weights + biases)</p></li>
|
||
<li><p>✅ <strong>Symmetric vs Asymmetric</strong>: Both schemes produce valid INT8 in [-128, 127]</p></li>
|
||
<li><p>✅ <strong>Calibration Impact</strong>: Percentile clipping reduces quantization error vs naive min/max</p></li>
|
||
<li><p>✅ <strong>QuantizedLinear Equivalence</strong>: Output matches FP32 Linear within tolerance (< 1% difference)</p></li>
|
||
<li><p>✅ <strong>Model-Level Quantization</strong>: Full network quantization preserves accuracy (< 2% degradation)</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="inline-testing-quantization-analysis">
|
||
<h3>Inline Testing & Quantization Analysis<a class="headerlink" href="#inline-testing-quantization-analysis" title="Link to this heading">#</a></h3>
|
||
<p>The module includes comprehensive validation with real-time feedback:</p>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Example inline test output</span>
|
||
<span class="err">🔬</span> <span class="n">Unit</span> <span class="n">Test</span><span class="p">:</span> <span class="n">quantize_int8</span><span class="p">()</span><span class="o">...</span>
|
||
<span class="err">✅</span> <span class="n">Symmetric</span> <span class="n">quantization</span><span class="p">:</span> <span class="nb">range</span> <span class="p">[</span><span class="o">-</span><span class="mi">128</span><span class="p">,</span> <span class="mi">127</span><span class="p">]</span> <span class="err">✓</span>
|
||
<span class="err">✅</span> <span class="n">Scale</span> <span class="n">calculation</span><span class="p">:</span> <span class="n">max_val</span> <span class="o">/</span> <span class="mi">127</span> <span class="o">=</span> <span class="mf">0.0234</span> <span class="err">✓</span>
|
||
<span class="err">✅</span> <span class="n">Roundtrip</span> <span class="n">error</span><span class="p">:</span> <span class="mf">0.31</span><span class="o">%</span> <span class="n">mean</span> <span class="n">error</span> <span class="err">✓</span>
|
||
<span class="err">📈</span> <span class="n">Progress</span><span class="p">:</span> <span class="n">quantize_int8</span><span class="p">()</span> <span class="err">✓</span>
|
||
|
||
<span class="err">🔬</span> <span class="n">Unit</span> <span class="n">Test</span><span class="p">:</span> <span class="n">QuantizedLinear</span><span class="o">...</span>
|
||
<span class="err">✅</span> <span class="n">Memory</span> <span class="n">reduction</span><span class="p">:</span> <span class="mi">145</span><span class="n">KB</span> <span class="err">→</span> <span class="mi">36</span><span class="n">KB</span> <span class="p">(</span><span class="mf">4.0</span><span class="err">×</span><span class="p">)</span> <span class="err">✓</span>
|
||
<span class="err">✅</span> <span class="n">Output</span> <span class="n">equivalence</span><span class="p">:</span> <span class="mf">0.43</span><span class="o">%</span> <span class="nb">max</span> <span class="n">difference</span> <span class="n">vs</span> <span class="n">FP32</span> <span class="err">✓</span>
|
||
<span class="err">📈</span> <span class="n">Progress</span><span class="p">:</span> <span class="n">QuantizedLinear</span> <span class="err">✓</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="manual-testing-examples">
|
||
<h3>Manual Testing Examples<a class="headerlink" href="#manual-testing-examples" title="Link to this heading">#</a></h3>
|
||
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">quantization_dev</span><span class="w"> </span><span class="kn">import</span> <span class="n">quantize_int8</span><span class="p">,</span> <span class="n">dequantize_int8</span><span class="p">,</span> <span class="n">QuantizedLinear</span>
|
||
<span class="kn">from</span><span class="w"> </span><span class="nn">tinytorch.nn</span><span class="w"> </span><span class="kn">import</span> <span class="n">Linear</span>
|
||
|
||
<span class="c1"># Test quantization on random tensor</span>
|
||
<span class="n">tensor</span> <span class="o">=</span> <span class="n">Tensor</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">100</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">float32</span><span class="p">))</span>
|
||
<span class="n">q_tensor</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">zero_point</span> <span class="o">=</span> <span class="n">quantize_int8</span><span class="p">(</span><span class="n">tensor</span><span class="p">)</span>
|
||
|
||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Original range: [</span><span class="si">{</span><span class="n">tensor</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">min</span><span class="p">()</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2">, </span><span class="si">{</span><span class="n">tensor</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">max</span><span class="p">()</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2">]"</span><span class="p">)</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Quantized range: [</span><span class="si">{</span><span class="n">q_tensor</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">min</span><span class="p">()</span><span class="si">}</span><span class="s2">, </span><span class="si">{</span><span class="n">q_tensor</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">max</span><span class="p">()</span><span class="si">}</span><span class="s2">]"</span><span class="p">)</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Scale: </span><span class="si">{</span><span class="n">scale</span><span class="si">:</span><span class="s2">.6f</span><span class="si">}</span><span class="s2">, Zero-point: </span><span class="si">{</span><span class="n">zero_point</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Dequantize and measure error</span>
|
||
<span class="n">restored</span> <span class="o">=</span> <span class="n">dequantize_int8</span><span class="p">(</span><span class="n">q_tensor</span><span class="p">,</span> <span class="n">scale</span><span class="p">,</span> <span class="n">zero_point</span><span class="p">)</span>
|
||
<span class="n">error</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">tensor</span><span class="o">.</span><span class="n">data</span> <span class="o">-</span> <span class="n">restored</span><span class="o">.</span><span class="n">data</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Roundtrip error: </span><span class="si">{</span><span class="n">error</span><span class="si">:</span><span class="s2">.4f</span><span class="si">}</span><span class="s2"> (</span><span class="si">{</span><span class="n">error</span><span class="o">/</span><span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">tensor</span><span class="o">.</span><span class="n">data</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span><span class="o">*</span><span class="mi">100</span><span class="si">:</span><span class="s2">.2f</span><span class="si">}</span><span class="s2">%)"</span><span class="p">)</span>
|
||
|
||
<span class="c1"># Quantize a Linear layer</span>
|
||
<span class="n">linear</span> <span class="o">=</span> <span class="n">Linear</span><span class="p">(</span><span class="mi">128</span><span class="p">,</span> <span class="mi">64</span><span class="p">)</span>
|
||
<span class="n">q_linear</span> <span class="o">=</span> <span class="n">QuantizedLinear</span><span class="p">(</span><span class="n">linear</span><span class="p">)</span>
|
||
|
||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"</span><span class="se">\n</span><span class="s2">Original weights: </span><span class="si">{</span><span class="n">linear</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">nbytes</span><span class="si">}</span><span class="s2"> bytes"</span><span class="p">)</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Quantized weights: </span><span class="si">{</span><span class="n">q_linear</span><span class="o">.</span><span class="n">weights_int8</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">nbytes</span><span class="si">}</span><span class="s2"> bytes"</span><span class="p">)</span>
|
||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Reduction: </span><span class="si">{</span><span class="n">linear</span><span class="o">.</span><span class="n">weight</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">nbytes</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="n">q_linear</span><span class="o">.</span><span class="n">weights_int8</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">nbytes</span><span class="si">:</span><span class="s2">.1f</span><span class="si">}</span><span class="s2">×"</span><span class="p">)</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="systems-thinking-questions">
|
||
<h2>Systems Thinking Questions<a class="headerlink" href="#systems-thinking-questions" title="Link to this heading">#</a></h2>
|
||
<section id="real-world-applications">
|
||
<h3>Real-World Applications<a class="headerlink" href="#real-world-applications" title="Link to this heading">#</a></h3>
|
||
<ul class="simple">
|
||
<li><p><strong>Mobile ML Deployment</strong>: TensorFlow Lite converts all models to INT8 for Android/iOS. Without quantization, models exceed app size limits (100-200MB) and drain battery 4× faster. Google Photos, Translate, Keyboard all run quantized models on-device.</p></li>
|
||
<li><p><strong>Edge AI Devices</strong>: Google Edge TPU (Coral), NVIDIA Jetson, Intel Neural Compute Stick require INT8 models. Hardware is designed exclusively for quantized operations - FP32 isn’t supported or is 10× slower.</p></li>
|
||
<li><p><strong>Cloud Inference Optimization</strong>: AWS Inferentia, Azure Inferentia, Google Cloud TPU serve quantized models. INT8 reduces memory bandwidth (bottleneck for inference) and increases throughput by 2-4×. At scale (millions of requests/day), this saves millions in infrastructure costs.</p></li>
|
||
<li><p><strong>Large Language Models</strong>: LLaMA-65B is 130GB in FP16, doesn’t fit on single 80GB A100 GPU. INT8 quantization → 65GB, enables serving. GPTQ pushes to 4-bit (33GB) with < 1% perplexity increase. Quantization is how enthusiasts run 70B models on consumer GPUs.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="quantization-mathematics">
|
||
<h3>Quantization Mathematics<a class="headerlink" href="#quantization-mathematics" title="Link to this heading">#</a></h3>
|
||
<ul class="simple">
|
||
<li><p><strong>Why INT8 vs INT4 or INT16?</strong> INT8 is the sweet spot: 4× memory reduction with < 1% accuracy loss. INT4 gives 8× reduction but 2-5% accuracy loss (harder to deploy). INT16 only 2× reduction (not worth complexity). Hardware acceleration (VNNI, NEON, Tensor Cores) standardized on INT8.</p></li>
|
||
<li><p><strong>Symmetric vs Asymmetric Trade-offs</strong>: Symmetric is simpler (no zero-point) but wastes range for skewed data. ReLU activations are [0, max] - symmetric centers around 0, wasting negative range. Asymmetric uses full INT8 range but costs extra zero-point storage and computation.</p></li>
|
||
<li><p><strong>Calibration Data Requirements</strong>: Theory: more data → better statistics. Practice: diminishing returns after 500-1000 samples. Percentile estimates stabilize quickly. Critical requirement: calibration data MUST match deployment distribution. If calibration is ImageNet but deployment is medical images, quantization fails catastrophically.</p></li>
|
||
<li><p><strong>Per-Channel Justification</strong>: Conv2D with 64 output channels: per-channel stores 64 scales + 64 zero-points = 512 bytes. Total weights: 3×3×64×64 FP32 = 147KB. Overhead: 0.35%. Accuracy improvement: 0.5-1.5%. Clear win - explains why production frameworks default to per-channel.</p></li>
|
||
</ul>
|
||
</section>
|
||
<section id="production-deployment-characteristics">
|
||
<h3>Production Deployment Characteristics<a class="headerlink" href="#production-deployment-characteristics" title="Link to this heading">#</a></h3>
|
||
<ul class="simple">
|
||
<li><p><strong>Speed Reality Check</strong>: INT8 matmul is theoretically 4× faster (4× less memory bandwidth). Practice: 2-3× on CPU (quantize/dequantize overhead), 4-10× on specialized hardware (Edge TPU, Neural Engine designed for pure INT8 graphs). Your Python implementation is 0× faster (simulation overhead > bandwidth savings).</p></li>
|
||
<li><p><strong>When Quantization is Mandatory</strong>: Mobile deployment (app size limits, battery constraints, Neural Engine acceleration), Edge devices (limited memory/compute), Cloud serving at scale (cost optimization). Not negotiable - models either quantize or don’t ship.</p></li>
|
||
<li><p><strong>When to Avoid Quantization</strong>: Accuracy-critical applications where 1% matters (medical diagnosis, autonomous vehicles), Early research iteration (quantization adds complexity), Models already tiny (< 10MB - quantization overhead not worth it), Cloud serving with abundant resources (FP32 throughput sufficient).</p></li>
|
||
<li><p><strong>Quantization-Aware Training vs Post-Training</strong>: PTQ (Post-Training Quantization) is fast (minutes) but loses 1-2% accuracy. QAT (Quantization-Aware Training) requires retraining (days/weeks) but loses < 0.5%. Choose PTQ for rapid iteration, QAT for production deployment. If using pretrained models you don’t own (BERT, ResNet), PTQ is only option.</p></li>
|
||
</ul>
|
||
</section>
|
||
</section>
|
||
<section id="ready-to-build">
|
||
<h2>Ready to Build?<a class="headerlink" href="#ready-to-build" title="Link to this heading">#</a></h2>
|
||
<p>You’re about to implement the precision reduction mathematics that make mobile ML deployment possible. Quantization is the difference between a model that exists in research and a model that ships in apps used by billions.</p>
|
||
<p>This module teaches honest quantization: you’ll implement the math correctly, achieve 4× memory reduction, and understand precisely why your Python code isn’t faster (hardware acceleration requires specialized silicon + compiled kernels). This clarity prepares you for production deployment where TensorFlow Lite, PyTorch Mobile, and ONNX Runtime apply your quantization mathematics with real INT8 hardware operations.</p>
|
||
<p>Understanding quantization from first principles - implementing the scale/zero-point calculations yourself, calibrating with real data, measuring accuracy-efficiency trade-offs - gives you deep insight into the constraints that define production ML systems.</p>
|
||
<p>Choose your preferred way to engage with this module:</p>
|
||
<div class="sd-container-fluid sd-sphinx-override sd-mb-4 docutils">
|
||
<div class="sd-row sd-row-cols-1 sd-row-cols-xs-1 sd-row-cols-sm-2 sd-row-cols-md-3 sd-row-cols-lg-3 docutils">
|
||
<div class="sd-col sd-d-flex-row docutils">
|
||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||
<div class="sd-card-body docutils">
|
||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||
Launch Binder</div>
|
||
<p class="sd-card-text">Run this module interactively in your browser. No installation required.</p>
|
||
</div>
|
||
<a class="sd-stretched-link sd-hide-link-text reference external" href="https://mybinder.org/v2/gh/mlsysbook/TinyTorch/main?filepath=modules/15_quantization/quantization_dev.ipynb"><span>https://mybinder.org/v2/gh/mlsysbook/TinyTorch/main?filepath=modules/15_quantization/quantization_dev.ipynb</span></a></div>
|
||
</div>
|
||
<div class="sd-col sd-d-flex-row docutils">
|
||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||
<div class="sd-card-body docutils">
|
||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||
Open in Colab</div>
|
||
<p class="sd-card-text">Use Google Colab for GPU access and cloud compute power.</p>
|
||
</div>
|
||
<a class="sd-stretched-link sd-hide-link-text reference external" href="https://colab.research.google.com/github/mlsysbook/TinyTorch/blob/main/modules/15_quantization/quantization_dev.ipynb"><span>https://colab.research.google.com/github/mlsysbook/TinyTorch/blob/main/modules/15_quantization/quantization_dev.ipynb</span></a></div>
|
||
</div>
|
||
<div class="sd-col sd-d-flex-row docutils">
|
||
<div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
|
||
<div class="sd-card-body docutils">
|
||
<div class="sd-card-title sd-font-weight-bold docutils">
|
||
View Source</div>
|
||
<p class="sd-card-text">Browse the Python source code and understand the implementation.</p>
|
||
</div>
|
||
<a class="sd-stretched-link sd-hide-link-text reference external" href="https://github.com/mlsysbook/TinyTorch/blob/main/modules/15_quantization/quantization_dev.py"><span>https://github.com/mlsysbook/TinyTorch/blob/main/modules/15_quantization/quantization_dev.py</span></a></div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="tip admonition">
|
||
<p class="admonition-title">Save Your Progress</p>
|
||
<p>Binder sessions are temporary. Download your completed notebook when done, or switch to local development for persistent work.</p>
|
||
</div>
|
||
<hr class="docutils" />
|
||
<div class="prev-next-area">
|
||
<a class="left-prev" href="../modules/14_profiling/ABOUT.html" title="previous page">← Module 14: Profiling</a>
|
||
<a class="right-next" href="../modules/16_compression/ABOUT.html" title="next page">Module 16: Compression →</a>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
|
||
<script type="text/x-thebe-config">
|
||
{
|
||
requestKernel: true,
|
||
binderOptions: {
|
||
repo: "binder-examples/jupyter-stacks-datascience",
|
||
ref: "master",
|
||
},
|
||
codeMirrorConfig: {
|
||
theme: "abcdef",
|
||
mode: "python"
|
||
},
|
||
kernelOptions: {
|
||
name: "python3",
|
||
path: "./modules"
|
||
},
|
||
predefinedOutput: true
|
||
}
|
||
</script>
|
||
<script>kernelName = 'python3'</script>
|
||
|
||
</article>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<footer class="prev-next-footer d-print-none">
|
||
|
||
<div class="prev-next-area">
|
||
<a class="left-prev"
|
||
href="14_profiling_ABOUT.html"
|
||
title="previous page">
|
||
<i class="fa-solid fa-angle-left"></i>
|
||
<div class="prev-next-info">
|
||
<p class="prev-next-subtitle">previous</p>
|
||
<p class="prev-next-title">14. Profiling - Performance Measurement for ML Systems</p>
|
||
</div>
|
||
</a>
|
||
<a class="right-next"
|
||
href="16_compression_ABOUT.html"
|
||
title="next page">
|
||
<div class="prev-next-info">
|
||
<p class="prev-next-subtitle">next</p>
|
||
<p class="prev-next-title">16. Compression - Pruning and Model Compression</p>
|
||
</div>
|
||
<i class="fa-solid fa-angle-right"></i>
|
||
</a>
|
||
</div>
|
||
</footer>
|
||
|
||
</div>
|
||
|
||
|
||
|
||
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
||
|
||
|
||
<div class="sidebar-secondary-item">
|
||
<div class="page-toc tocsection onthispage">
|
||
<i class="fa-solid fa-list"></i> Contents
|
||
</div>
|
||
<nav class="bd-toc-nav page-toc">
|
||
<ul class="visible nav section-nav flex-column">
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#overview">Overview</a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#learning-objectives">Learning Objectives</a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#build-use-optimize">Build → Use → Optimize</a></li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#implementation-guide">Implementation Guide</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantization-flow-fp32-int8">Quantization Flow: FP32 → INT8</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#what-youre-actually-building-educational-quantization">What You’re Actually Building (Educational Quantization)</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#core-quantization-mathematics">Core Quantization Mathematics</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#calibration-the-critical-step">Calibration - The Critical Step</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#per-tensor-vs-per-channel-quantization">Per-Tensor vs Per-Channel Quantization</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantizedlinear-quantized-neural-network-layer">QuantizedLinear - Quantized Neural Network Layer</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#model-level-quantization">Model-Level Quantization</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#getting-started">Getting Started</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#prerequisites">Prerequisites</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#development-workflow">Development Workflow</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#testing">Testing</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#comprehensive-test-suite">Comprehensive Test Suite</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#test-coverage-areas">Test Coverage Areas</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#inline-testing-quantization-analysis">Inline Testing & Quantization Analysis</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#manual-testing-examples">Manual Testing Examples</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#systems-thinking-questions">Systems Thinking Questions</a><ul class="nav section-nav flex-column">
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#real-world-applications">Real-World Applications</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#quantization-mathematics">Quantization Mathematics</a></li>
|
||
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#production-deployment-characteristics">Production Deployment Characteristics</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#ready-to-build">Ready to Build?</a></li>
|
||
</ul>
|
||
</nav></div>
|
||
|
||
</div></div>
|
||
|
||
|
||
</div>
|
||
<footer class="bd-footer-content">
|
||
|
||
<div class="bd-footer-content__inner container">
|
||
|
||
<div class="footer-item">
|
||
|
||
<p class="component-author">
|
||
By Prof. Vijay Janapa Reddi (Harvard University)
|
||
</p>
|
||
|
||
</div>
|
||
|
||
<div class="footer-item">
|
||
|
||
|
||
<p class="copyright">
|
||
|
||
© Copyright 2025.
|
||
<br/>
|
||
|
||
</p>
|
||
|
||
</div>
|
||
|
||
<div class="footer-item">
|
||
|
||
</div>
|
||
|
||
<div class="footer-item">
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</footer>
|
||
|
||
|
||
</main>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
||
<script src="../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b"></script>
|
||
|
||
<footer class="bd-footer">
|
||
</footer>
|
||
</body>
|
||
</html> |