mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-03-11 17:49:25 -05:00
Updated figures in chapter 10: model_compression
This commit is contained in:
@@ -2595,21 +2595,130 @@ Box/.style={align=center,
|
||||
text width=32mm,
|
||||
minimum width=32mm, minimum height=10mm
|
||||
},
|
||||
Box2/.style={Box,fill=VioletL2,draw=VioletLine}
|
||||
Box2/.style={Box,fill=VioletL2,draw=VioletLine},
|
||||
Circ/.style={draw=none,fill=none,circle,minimum size=30mm},
|
||||
Arr/.style={-{Triangle[width=10pt,length=6pt]}, line width=5pt,violet!40,
|
||||
shorten <=-8pt,shorten >=-8pt},
|
||||
}
|
||||
|
||||
\node[Box](B1){Search Space \\ $\mathcal{A}$};
|
||||
\node[Box2,right=of B1](B2){Search Strategy};
|
||||
\node[Box2,right=of B2](B3){Performance\\ Estimation Strategy};
|
||||
\scoped[on background layer]
|
||||
\node[draw=BackLine,inner xsep=5mm,inner ysep=5mm,minimum height=40mm,
|
||||
yshift=6.5mm,fill=BackColor!30,fit=(B2)(B3),line width=1pt](BB1){};
|
||||
%griddot
|
||||
\tikzset{
|
||||
pics/griddot/.style = {
|
||||
code = {
|
||||
\pgfkeys{/channel/.cd, #1}
|
||||
\begin{scope}[shift={($(0,0)+(0,0)$)},line cap=round,scale=\scalefac,every node/.append style={transform shape}]
|
||||
\node[draw=\drawcolor,line width=0.4*\Linewidth,fill=\filllcolor,rectangle,minimum height=20mm,
|
||||
minimum width=20mm](RE){};
|
||||
|
||||
\draw[draw=\drawcolor,line width =\Linewidth,shorten >=5pt,shorten <=5pt]
|
||||
(RE.150)--coordinate[pos=0.66](D1)(RE.30);
|
||||
\node[draw=\drawcolor,fill=cyan,line width=\Linewidth,circle,minimum size=13,inner sep=2pt]at(D1){};);
|
||||
\draw[draw=\drawcolor,line width =\Linewidth,shorten >=5pt,shorten <=5pt]
|
||||
(RE.180)--coordinate[pos=0.33](D2)(RE.0);
|
||||
\node[draw=\drawcolor,fill=violet!50,line width=\Linewidth,circle,minimum size=13,inner sep=2pt]at(D2){};);
|
||||
\draw[draw=\drawcolor,line width =\Linewidth,shorten >=5pt,shorten <=5pt]
|
||||
(RE.210)--coordinate[pos=0.66](D1)(RE.330);
|
||||
\node[draw=\drawcolor,fill=red,line width=\Linewidth,circle,minimum size=13,inner sep=2pt]at(D1){};);
|
||||
\end{scope}
|
||||
}
|
||||
}
|
||||
}
|
||||
%patharrow
|
||||
\tikzset{
|
||||
pics/patharrow/.style = {
|
||||
code = {
|
||||
\pgfkeys{/channel/.cd, #1}
|
||||
\begin{scope}[shift={($(0,0)+(0,0)$)},scale=\scalefac,every node/.append style={transform shape}]
|
||||
\node[draw=\drawcolor,line width=1.5pt,fill=\filllcolor,rectangle,
|
||||
minimum width=20mm,minimum height=20mm](RE){};
|
||||
\node[draw=\drawcolor,fill=\filllcirclecolor,line width=3pt,circle,minimum size=18,inner sep=2pt,]
|
||||
at($(RE.south east)!0.75)!(RE.north west)$){};
|
||||
\node [font=\bfseries\fontsize{32pt}{32}\selectfont,text=\drawcolor,
|
||||
rotate=45,line width=3pt]at($(RE.south east)!0.23)!(RE.north west)$){+};
|
||||
%
|
||||
\draw[draw=black,
|
||||
-{Latex[length=6mm,width=6mm,round,open,fill=\filllcirclecolor,line width=2.5pt]},
|
||||
line width=3pt,shorten <=4pt](RE.240)--++(0,0.58)
|
||||
arc[start angle=180, end angle=90, radius=3mm]
|
||||
--++(0.43,0)
|
||||
arc[start angle=270, end angle=360, radius=3mm]
|
||||
--++(0,0.7);
|
||||
\end{scope}
|
||||
}
|
||||
}
|
||||
}
|
||||
%graph
|
||||
\tikzset{pics/graph/.style = {
|
||||
code = {
|
||||
\pgfkeys{/channel/.cd, #1}
|
||||
\begin{scope}[local bounding box=GRAPH,scale=\scalefac, every node/.append style={transform shape}]
|
||||
\draw[line width=2*\Linewidth,draw = \drawcolor](-0.20,0)--(2,0);
|
||||
\draw[line width=2*\Linewidth,draw = \drawcolor](-0.20,0)--(-0.20,2);
|
||||
\foreach \i/\vi[count=\k] in {0/5,0.5/8,1/11,1.5/15}{
|
||||
\node[draw, minimum width =4mm, minimum height = \vi mm, inner sep = 0pt,
|
||||
draw = \filllcolor, fill=\filllcolor!20, line width=\Linewidth,anchor=south west](COM\k)at(\i,0.2){};
|
||||
}
|
||||
\draw[line width=2*\Linewidth,draw =\filllcolor!70!black!80,->,>=Latex]
|
||||
($(COM1.north west)+(0,0.15)$)--($(COM2.north west)+(0,0.5)$)
|
||||
--++(0,-0.35)--($(COM3.north west)+(0,0.5)$)
|
||||
--++(0,-0.35)--($(COM4.north west)+(0,0.4)$)
|
||||
;
|
||||
\end{scope}
|
||||
}
|
||||
}
|
||||
}
|
||||
\pgfkeys{
|
||||
/channel/.cd,
|
||||
Dual/.store in=\Dual,
|
||||
Depth/.store in=\Depth,
|
||||
Height/.store in=\Height,
|
||||
Width/.store in=\Width,
|
||||
Smile/.store in=\Smile,
|
||||
Level/.store in=\Level,
|
||||
filllcirclecolor/.store in=\filllcirclecolor,
|
||||
filllcolor/.store in=\filllcolor,
|
||||
drawcolor/.store in=\drawcolor,
|
||||
drawcircle/.store in=\drawcircle,
|
||||
scalefac/.store in=\scalefac,
|
||||
Linewidth/.store in=\Linewidth,
|
||||
picname/.store in=\picname,
|
||||
filllcolor=BrownLine,
|
||||
filllcirclecolor=cyan,
|
||||
drawcolor=black,
|
||||
drawcircle=violet,
|
||||
scalefac=1,
|
||||
Dual=adual,
|
||||
Smile=smile,
|
||||
Level=0.52,
|
||||
Linewidth=0.5pt,
|
||||
Depth=1.3,
|
||||
Height=0.8,
|
||||
Width=1.1,
|
||||
picname=C
|
||||
}
|
||||
|
||||
\node[Circ](C1){};
|
||||
\pic[shift={(0,0)}] at (C1){patharrow={scalefac=1.0,picname=1,drawcolor=orange,
|
||||
filllcolor=orange!05!,Linewidth=0.7pt, filllcirclecolor=green!60}};
|
||||
\node[below =-7pt of C1](B1){Search Space $\mathcal{A}$};
|
||||
|
||||
\node[Circ,right=3.5 of C1](C2){};
|
||||
\pic[shift={(0,0)}] at (C2){griddot={scalefac=1,picname=1,drawcolor=BlueD,
|
||||
filllcolor=green!10!,Linewidth=2.75pt, filllcirclecolor=cyan!20}};
|
||||
\node[below =-7pt of C2](B2){Search Strategy};
|
||||
|
||||
\node[Circ,right=3.5 of C2](C3){};
|
||||
\pic[shift={(-0.80,-0.9)}] at (C3){graph={scalefac=1,picname=1,filllcolor=BlueLine, Linewidth=1.0pt}};
|
||||
\node[below =-7pt of C3,align=center](B3){Performance\\ Estimation Strategy};
|
||||
\scoped[on background layer]
|
||||
\node[draw=BackLine,inner xsep=5mm,inner ysep=8mm,minimum height=40mm,
|
||||
yshift=6.5mm,fill=BackColor!10,fit=(C2)(C3)(B3),line width=1pt](BB1){};
|
||||
\node[below=4pt of BB1.north,inner sep=0pt,
|
||||
anchor=north,align=center]{One-shot approach:\\
|
||||
learning model architecture parameters and weights together};
|
||||
\draw[Line,-latex](B1)--(B2);
|
||||
\draw[Line,-latex](B2.8)--node[above,align=center]{Architecture \\ $A\in\mathcal{A}$}(B3.172);
|
||||
\draw[Line,latex-](B2.352)--node[below,align=center]{Performance\\ estimate of $A$}(B3.188);
|
||||
\draw[Arr](C1)--(C2);
|
||||
\draw[Arr](C2.15)--node[text=black,above=2pt,align=center]{Architecture $A\in\mathcal{A}$}(C3.165);
|
||||
\draw[Arr](C3.195)--node[text=black,below,align=center]{Performance\\ estimate of $A$}(C2.345);
|
||||
\end{tikzpicture}
|
||||
```
|
||||
:::
|
||||
@@ -3112,7 +3221,117 @@ Reducing numerical precision introduces trade-offs, however. Lower-precision for
|
||||
|
||||
To appreciate how precision loss manifests in practice, examine the representative quantization error distribution in @fig-quantization: the bell-shaped curve centered near zero shows that most values quantize with minimal error, but the tails reveal outlier errors that can accumulate and influence model accuracy. Understanding this noise is essential, but practitioners ultimately care about end-to-end speedup, and the magnitude of *the quantization speedup* depends on whether a workload is compute-bound or memory-bound.
|
||||
|
||||
{#fig-quantization width=80% fig-alt="Histogram showing quantization error distribution weighted by probability density. Bell-shaped curve centered near zero with tails extending to positive and negative errors, illustrating typical quantization noise pattern."}
|
||||
::: {#fig-3float fig-env="figure" fig-pos="htb" fig-cap="**Quantization Error Distribution**: Histogram of quantization error weighted by probability density $p(x)$, showing a bell-shaped curve centered near zero with tails that introduce quantization noise affecting model accuracy." fig-alt="Histogram showing quantization error distribution weighted by probability density. Bell-shaped curve centered near zero with tails extending to positive and negative errors, illustrating typical quantization noise pattern."}
|
||||
```{.tikz}
|
||||
\begin{tikzpicture}[font=\small\usefont{T1}{phv}{m}{n}]
|
||||
\definecolor{barfill}{cmyk}{0.7, 0.4, 0, 0}
|
||||
\begin{axis}[
|
||||
width=16cm,
|
||||
height=9cm,
|
||||
xmin=-3, xmax=3,
|
||||
ymin=0, ymax=1.06,
|
||||
axis lines=left,
|
||||
xtick={-3,-2,-1,0,1,2,3},
|
||||
xticklabels={$-$3,$-$2,$-$1,0,+1,+2,+3},
|
||||
ytick={0,0.25,0.5,0.75,1.0},
|
||||
ymajorgrids=true,
|
||||
grid style={draw=black!10},
|
||||
tick align=outside,
|
||||
tick style={black},
|
||||
tick label style={/pgf/number format/assume math mode=true},
|
||||
xlabel={\bfseries Quantization Error},
|
||||
ylabel={\bfseries\usefont{T1}{phv}{m}{n} $p(x)\times$ Error},
|
||||
title={\bfseries Quantization Error Distribution},
|
||||
title style={yshift=1pt},
|
||||
clip=false
|
||||
]
|
||||
|
||||
% ------------------------------------------------------------
|
||||
% 1) Histogram bars (approx. values, symmetric)
|
||||
% x = bin center, y = bar height
|
||||
% ------------------------------------------------------------
|
||||
\addplot[ybar, line width=0.7pt,bar width=12.5pt, draw=BlueD, fill=barfill!80]
|
||||
table[row sep=\\]{
|
||||
x y\\
|
||||
-2.2 0.10\\
|
||||
-2.0 0.16\\
|
||||
-1.8 0.23\\
|
||||
-1.6 0.31\\
|
||||
-1.4 0.43\\
|
||||
-1.2 0.56\\
|
||||
-1.0 0.7\\
|
||||
-0.8 0.80\\
|
||||
-0.6 0.9\\
|
||||
-0.4 0.97\\
|
||||
-0.2 0.99\\
|
||||
0.0 1.0\\
|
||||
0.2 0.99\\
|
||||
0.4 0.95\\
|
||||
0.6 0.87\\
|
||||
0.8 0.75\\
|
||||
1.0 0.64\\
|
||||
1.2 0.52\\
|
||||
1.4 0.40\\
|
||||
1.6 0.3\\
|
||||
1.8 0.22\\
|
||||
2.0 0.15\\
|
||||
2.2 0.10\\
|
||||
};
|
||||
|
||||
% ------------------------------------------------------------
|
||||
% 2) Red tail overlays (bars in tails)
|
||||
% left tail: x <= -2.2, right tail: x >= 2.0
|
||||
% ------------------------------------------------------------
|
||||
\addplot[ybar, bar width=12.5pt, draw=red!70!black, fill=red!35, fill opacity=0.65]
|
||||
table[row sep=\\]{
|
||||
x y\\
|
||||
-2.8 0.03\\
|
||||
-2.6 0.04\\
|
||||
-2.4 0.06\\
|
||||
};
|
||||
|
||||
\addplot[ybar, bar width=12.5pt, draw=red!70!black, fill=red!35, fill opacity=0.65]
|
||||
table[row sep=\\]{
|
||||
x y\\
|
||||
2.4 0.06\\
|
||||
2.6 0.04\\
|
||||
2.8 0.03\\
|
||||
};
|
||||
|
||||
% ------------------------------------------------------------
|
||||
% 3) Smooth curve (normal-like) in dark red
|
||||
% ------------------------------------------------------------
|
||||
\addplot[line width=2pt, red!70!black, smooth, domain=-3:3, samples=200]
|
||||
{exp(-0.5*x^2)}; % scaled to peak ~1
|
||||
|
||||
% ------------------------------------------------------------
|
||||
% 4) Center dashed line around 0 (slightly to the right like the figure)
|
||||
% ------------------------------------------------------------
|
||||
\addplot[black!35, dashed, white,line width=1pt]
|
||||
coordinates {(0.0,0) (0.0,1.0)};
|
||||
% ------------------------------------------------------------
|
||||
% 5) Annotations (Tail errors) with dashed arrows
|
||||
% ------------------------------------------------------------
|
||||
\draw[red!70!black, dashed, <-, >=Latex,line width=1pt]
|
||||
(axis cs:-2.55,0.08) -- (axis cs:-2.4,0.27)
|
||||
node[above=40pt,anchor=north,align=center,text=red!70!black, font=\usefont{T1}{phv}{m}{n}\bfseries]{Tail errors};
|
||||
%
|
||||
\draw[red!70!black, dashed, <-, >=Latex,line width=1pt]
|
||||
(axis cs:2.55,0.08) -- (axis cs:2.35,0.27)
|
||||
node[above=40pt,anchor=north,align=center,text=red!70!black, font=\usefont{T1}{phv}{m}{n}\bfseries]{Tail errors}
|
||||
node[above=27pt,anchor=north,align=center,text=black!50, font=\footnotesize\usefont{T1}{phv}{m}{n}\itshape]{affect model\\
|
||||
accuracy};
|
||||
|
||||
\node[font=\itshape\small\usefont{T1}{phv}{m}{it},text=black!70] at (rel axis cs:0.5,1.01)
|
||||
{Error weighted by probability density $p(x)$};
|
||||
|
||||
\node[font=\itshape\footnotesize\usefont{T1}{phv}{m}{it},text=black!70] at (rel axis cs:0.5,.-0.21)
|
||||
{Most quantization errors are near zero; tail errors introduce cumulative noise};
|
||||
\end{axis}
|
||||
\end{tikzpicture}
|
||||
```
|
||||
:::
|
||||
|
||||
|
||||
::: {.callout-notebook title="The Quantization Speedup (Compute-Bound)"}
|
||||
**Problem**: You have a compute-bound matrix multiplication (e.g., in a Transformer MLP block). You switch from FP16 to INT8. What is the expected speedup?
|
||||
@@ -3536,7 +3755,97 @@ For example, consider quantizing activations that originally range between -6 an
|
||||
|
||||
Common calibration methods include **Max**\index{Quantization!calibration!max method} (uses maximum absolute value, simple but susceptible to outliers), **Entropy**\index{Quantization!calibration!entropy method} (minimizes KL divergence between original and quantized distributions, TensorRT's default), and **Percentile**\index{Quantization!calibration!percentile method} (clips to a percentile, e.g., 99%, avoiding outlier impact). @fig-resnet-activations-histogram shows *why* outlier handling matters: ResNet50 activations exhibit long tails where outliers can skew the quantization range.
|
||||
|
||||
![**Activation Distribution**: Resnet50 layer activations exhibit a long tail, with outlier values that can lead to inefficient precision use if not handled carefully. Source: [@wu2020integer].](images/svg/activation_histogram.svg){#fig-resnet-activations-histogram width=85% fig-alt="Histogram of ResNet50 activation values showing right-skewed distribution. Most values cluster near zero with long tail extending to outliers around 2.1, demonstrating challenge for quantization range selection."}
|
||||
::: {#fig-resnet-activations-histogram fig-env="figure" fig-pos="htb" fig-cap="**Activation Distribution**: Resnet50 layer activations exhibit a long tail, with outlier values that can lead to inefficient precision use if not handled carefully. Source: [@wu2020integer]." fig-alt="Histogram of ResNet50 activation values showing right-skewed distribution. Most values cluster near zero with long tail extending to outliers around 2.1, demonstrating challenge for quantization range selection."}
|
||||
```{.tikz}
|
||||
\begin{tikzpicture}[font=\small\usefont{T1}{phv}{m}{n}]
|
||||
\definecolor{barfill}{cmyk}{0.7, 0.4, 0, 0}
|
||||
\begin{axis}[
|
||||
width=16cm,
|
||||
height=9cm,
|
||||
xmin=0, xmax=2.5,
|
||||
ymin=0, ymax=1.06,
|
||||
axis lines=left,
|
||||
xtick={0,0.5,1,1.5,2,2.5},
|
||||
xticklabels={0.0,0.5,1.0,1.5,2.0,2.5},
|
||||
ytick={0,0.25,0.5,0.75,1.0},
|
||||
ymajorgrids=true,
|
||||
grid style={draw=black!10},
|
||||
tick align=outside,
|
||||
tick style={black},
|
||||
tick label style={/pgf/number format/assume math mode=true},
|
||||
xlabel={\bfseries Activation Value},
|
||||
ylabel={\bfseries\usefont{T1}{phv}{m}{n} Frequency},
|
||||
title={\bfseries ResNet50 Layer Activation Distribution},
|
||||
title style={yshift=1pt},
|
||||
clip=false, %enlarge x limits=0.02,
|
||||
]
|
||||
|
||||
% ------------------------------------------------------------
|
||||
% 1) Histogram bars (approx. values, symmetric)
|
||||
% x = bin center, y = bar height
|
||||
% ------------------------------------------------------------
|
||||
\addplot[ybar, line width=0.7pt,bar width=15pt, draw=BlueD, fill=barfill!80]
|
||||
table[row sep=\\]{
|
||||
x y\\
|
||||
0.05 1\\
|
||||
0.15 0.95\\
|
||||
0.25 0.91\\
|
||||
0.35 0.84\\
|
||||
0.45 0.74\\
|
||||
0.55 0.6\\
|
||||
0.65 0.47\\
|
||||
0.75 0.36\\
|
||||
0.85 0.28\\
|
||||
0.95 0.21\\
|
||||
1.05 0.16\\
|
||||
1.15 0.12\\
|
||||
};
|
||||
% ------------------------------------------------------------
|
||||
% 2) Red tail overlays (bars in tails)
|
||||
% left tail: x <= -2.2, right tail: x >= 2.0
|
||||
% ------------------------------------------------------------
|
||||
\addplot[ybar, bar width=15pt, draw=red!70!black, fill=red!35, fill opacity=0.65]
|
||||
table[row sep=\\]{
|
||||
x y\\
|
||||
1.25 0.09\\
|
||||
1.35 0.06\\
|
||||
1.45 0.04\\
|
||||
1.55 0.035\\
|
||||
1.65 0.03\\
|
||||
1.75 0.025\\
|
||||
1.85 0.02\\
|
||||
1.95 0.015\\
|
||||
2.05 0.01\\
|
||||
};
|
||||
|
||||
\node[font=\itshape\small\usefont{T1}{phv}{m}{it},text=black!70] at (rel axis cs:0.5,1.01)
|
||||
{Right-skewed distribution with long tail challenging quantization range selection};
|
||||
|
||||
\node[font=\itshape\footnotesize\usefont{T1}{phv}{m}{it},text=black!70] at (rel axis cs:0.5,.-0.21)
|
||||
{Most values cluster near zero; long tail requires careful calibration range selection};
|
||||
% ------------------------------------------------------------
|
||||
% 3) Center dashed line around 0 (slightly to the right like the figure)
|
||||
% ------------------------------------------------------------
|
||||
\addplot[black!35, dashed, GreenD,line width=1pt]
|
||||
coordinates {(1.1,0) (1.1,1.0)};
|
||||
% ------------------------------------------------------------
|
||||
% 4) Annotations (Tail errors) with dashed arrows
|
||||
% ------------------------------------------------------------
|
||||
\node[align=left,text=GreenD, font=\usefont{T1}{phv}{m}{n}\bfseries,
|
||||
anchor=north west]at (axis cs:1.15,0.96){99th percentile\\
|
||||
{\usefont{T1}{phv}{m}{it} clipe here}};
|
||||
%
|
||||
\draw[red!70!black, dashed, <-, >=Latex,line width=1pt]
|
||||
(axis cs:1.15,0.13) -- (axis cs:1.6,0.4)
|
||||
node[right=5pt,align=left,text=red!70!black,
|
||||
font=\usefont{T1}{phv}{m}{n}\bfseries]{Outlier tail\\
|
||||
{\color{black!70}\footnotesize\usefont{T1}{phv}{m}{it} Skews quantization range}\\[-2pt]
|
||||
{\color{black!70}\footnotesize\usefont{T1}{phv}{m}{it} if not clipped}};
|
||||
\end{axis}
|
||||
\end{tikzpicture}
|
||||
|
||||
```
|
||||
:::
|
||||
|
||||
Calibration ranges can be **symmetric**\index{Quantization!symmetric} (equal positive and negative scaling) or **asymmetric**\index{Quantization!asymmetric} (different scaling factors for each side, useful when distributions are skewed). The choice of method and range significantly affects quantized model accuracy.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user