mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-04 16:48:48 -05:00
docs: update site with optimization tier flow explanation
This commit is contained in:
@@ -449,12 +449,12 @@
|
||||
people: [{name: "Geoffrey Hinton", role: "Distillation"}, {name: "Lottery Ticket Hypothesis", role: "Frankle & Carbin"}]
|
||||
},
|
||||
{
|
||||
id: "17_memoization", title: "Memoization", year: "2022", val: 0.80,
|
||||
id: "17_acceleration", title: "Acceleration", year: "2016", val: 0.85,
|
||||
desc: "KV-Cache for fast inference.",
|
||||
people: [{name: "Pope et al.", role: "Efficiently Scaling Transformers"}, {name: "vLLM Team", role: "PagedAttention"}]
|
||||
},
|
||||
{
|
||||
id: "18_acceleration", title: "Acceleration", year: "2016", val: 0.85,
|
||||
id: "18_memoization", title: "Memoization", year: "2022", val: 0.80,
|
||||
desc: "Hardware optimization (TPU/GPU).",
|
||||
people: [{name: "NVIDIA", role: "CUDA/Tensor Cores"}, {name: "Google", role: "TPU Architecture"}]
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user