mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-08 18:01:20 -05:00
Per-file audit caught 14 cite keys whose surname prefix or year did not
match the entry's actual paper, plus 4 DOI duplicates and 3 corrupted
orphan entries. Renames preserve the cited paper; only the key changes.
Renames (key -> first-author-surname-year-shortform):
- vol2: agarwal2022 -> ouyang2022instructgpt; alistarh2024 ->
ashkboos2024quarot; belkada2022 -> dettmers2022llmint8; borgeaud2022 ->
hoffmann2022chinchilla; bosma2022 -> wei2022cot; ermon2023 ->
rafailov2023dpo; koyejo2023 -> schaeffer2023mirage; nofal2023 ->
beyer2016sre (year/publisher also corrected to O'Reilly 2016).
- vol1: mccarthy2006 -> mccarthy1955dartmouth; krizhevsky2017 ->
krizhevsky2012imagenet; zhang2021 -> zhang2017rethinking; ford2012 ->
savage2009flaw; wonyoung_kim2008 -> kim2008dvfs; estrada2026 ->
dehghani2022datamesh; michelucci2018 -> glorot2010xavier (entry was
Michelucci textbook chapter, prose wanted Glorot/Bengio AISTATS 2010);
chapelle2009 -> chapelle2006semisupervised (entry was 1-page IEEE
review, prose wanted the actual MIT Press book).
- interviews: key555befcd -> gierl2013automatic; chiang2023 ->
zheng2023judging; boylan1989 -> tay2024interview (Grind 75 web
resource); stenbeck1992 -> hambleton1991 (entry was 1992 review of the
1991 IRT book, content was the book).
DOI dedup:
- vol1 palmer1980 + palmer1980intel8087 -> palmer1980intel8087 (same
paper, redirected cite, deleted dupe).
- vol2 masanet2020 + masanet2020energy -> masanet2020energy (same paper,
redirected cite, deleted dupe).
- vol1 abadi2016tensorflow had wrong DOI pointing to the 2018 EuroSys
Dynamic Control Flow paper; rebuilt as the OSDI 2016 TensorFlow paper
it claims to be. Mirrored same correction into vol2's duplicate entry.
Orphan deletions (zero cite sites, corrupted metadata):
- vol1 acun2023; vol1 aggarwal2018; interviews gallifant2024 (the clean
GPT-4 entry already exists at openai2023gpt4).
- vol1 yu2018 (legitimate paper but unused).
- vol2 mckinsey2018ai and triton.jit (orphans flagged for missing year;
triton.jit was a false positive from a Python decorator inside a code
block, not a citation).
Field repairs:
- aws2020s3: added year=2020, fixed corrupted author "A. W. Services"
to {Amazon Web Services}, added howpublished + url.
51 cite-site updates across 25 files in vol1/vol2/interviews/mlsysim.
All book-prose.md §5 cite-mechanics audit greps return zero hits.
bib_lint reports 0 errors across all three modified bibs.
858 lines
32 KiB
BibTeX
858 lines
32 KiB
BibTeX
@book{aera2014standards,
|
|
title = {Standards for Educational and Psychological Testing},
|
|
author = {
|
|
{American Educational Research Association} and {American Psychological Association} and
|
|
{National Council on Measurement in Education}
|
|
},
|
|
year = {2014},
|
|
publisher = {American Educational Research Association},
|
|
note = {Gold standard for assessment validation: content, construct, consequential validity},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@article{agrawal2025,
|
|
title = {Efficient LLM Inference via Chunked Prefills},
|
|
author = {
|
|
Agrawal, Amey and Kedia, Nitin and Panwar, Ashish and Mohan, Jayashree and Kwatra, Nipun and
|
|
Gulavani, Bhargav S. and Tumanov, Alexey and Ramjee, Ramachandran
|
|
},
|
|
year = {2024},
|
|
journal = {ACM SIGOPS Operating Systems Review},
|
|
booktitle = {18th USENIX Symposium on Operating Systems Design and Implementation (OSDI)},
|
|
publisher = {Association for Computing Machinery (ACM)},
|
|
volume = {59},
|
|
number = {1},
|
|
pages = {9--16},
|
|
doi = {10.1145/3759441.3759444},
|
|
issn = {0163-5980},
|
|
url = {https://doi.org/10.1145/3759441.3759444},
|
|
note = {
|
|
Sarathi-Serve: chunked prefill with priority scheduling to bound TTFT under continuous batching
|
|
},
|
|
source = {Crossref},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {paper-revision-fresh-reader-pass},
|
|
}
|
|
|
|
@inproceedings{ainslie2023,
|
|
title = {GQA: Training Generalized Multi-Query Transformer Models From Multi-Head Checkpoints},
|
|
author = {
|
|
Ainslie, Joshua and Lee-Thorp, James and de Jong, Michiel and Zemlyanskiy, Yury and Lebr{\'o}n,
|
|
Federico and Sanghai, Sumit
|
|
},
|
|
year = {2023},
|
|
journal = {
|
|
Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing (EMNLP)
|
|
},
|
|
booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
|
|
publisher = {Association for Computational Linguistics},
|
|
volume = {2023},
|
|
pages = {4895--4901},
|
|
doi = {10.18653/v1/2023.emnlp-main.298},
|
|
url = {https://doi.org/10.18653/v1/2023.emnlp-main.298},
|
|
note = {Grouped-query attention: KV-head reduction reduces KV-cache memory at modest quality cost},
|
|
source = {Crossref},
|
|
x-verified = {2026-05-04},
|
|
x-verified-by = {openai-MODEL},
|
|
x-verified-status = {verified},
|
|
x-verified-source = {https://aclanthology.org/2023.emnlp-main.298/},
|
|
}
|
|
|
|
@techreport{amd2023mi300x,
|
|
title = {{AMD Instinct MI300X} Accelerator Datasheet},
|
|
author = {{AMD}},
|
|
year = {2023},
|
|
url = {https://www.amd.com/en/products/accelerators/instinct/mi300/mi300x.html},
|
|
note = {MI300X: 192 GB HBM3, 5.3 TB/s memory bandwidth, 1.3 PFLOPS FP16 dense},
|
|
institution = {Advanced Micro Devices},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {paper-revision-fresh-reader-pass},
|
|
}
|
|
|
|
@book{anderson2001taxonomy,
|
|
title = {
|
|
A Taxonomy for Learning, Teaching, and Assessing: A Revision of Bloom's Taxonomy of Educational
|
|
Objectives
|
|
},
|
|
author = {
|
|
Anderson, Lorin W. and Krathwohl, David R. and Airasian, Peter W. and Cruikshank, Kathleen A.
|
|
and Mayer, Richard E. and Pintrich, Paul R. and Raths, James and Wittrock, Merlin C.
|
|
},
|
|
year = {2001},
|
|
publisher = {Longman},
|
|
note = {The revised Bloom's taxonomy: Remember, Understand, Apply, Analyze, Evaluate, Create},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@misc{anking2024,
|
|
title = {{AnKing} Step Deck --- Community Quality Control},
|
|
author = {{AnKing Team}},
|
|
year = {2024},
|
|
url = {https://www.ankingmed.com},
|
|
note = {Large-scale Anki deck with community-driven dedup and quality flags},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://www.ankingmed.com},
|
|
}
|
|
|
|
@techreport{arm_cortex_m4,
|
|
title = {{Arm Cortex-M4} Technical Reference Manual},
|
|
author = {{Arm Limited}},
|
|
year = {2020},
|
|
url = {https://developer.arm.com/documentation/100166/0001},
|
|
note = {Cortex-M4: 64 MHz typical, 256 KB SRAM, no MMU, FPU optional, hard real-time},
|
|
institution = {Arm Limited},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {paper-revision-fresh-reader-pass},
|
|
}
|
|
|
|
@book{biggs1982solo,
|
|
title = {
|
|
Evaluating the Quality of Learning: The {SOLO} Taxonomy (Structure of the Observed Learning
|
|
Outcome)
|
|
},
|
|
author = {Biggs, John B. and Collis, Kevin F.},
|
|
year = {1982},
|
|
publisher = {Academic Press},
|
|
note = {SOLO taxonomy: prestructural, unistructural, multistructural, relational, extended abstract},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@book{bloom1956taxonomy,
|
|
title = {Taxonomy of Educational Objectives: The Classification of Educational Goals},
|
|
author = {
|
|
Bloom, Benjamin S. and Engelhart, Max D. and Furst, Edward J. and Hill, Walker H. and
|
|
Krathwohl, David R.
|
|
},
|
|
year = {1956},
|
|
publisher = {David McKay Company},
|
|
note = {Original Bloom's taxonomy. Revised by Anderson \& Krathwohl (2001)},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@inproceedings{broder_e738fd4b,
|
|
title = {On the Resemblance and Containment of Documents},
|
|
author = {Broder, Andrei Z.},
|
|
year = {1997},
|
|
booktitle = {Proceedings. Compression and Complexity of SEQUENCES 1997 (Cat. No.97TB100171)},
|
|
publisher = {IEEE Comput. Soc},
|
|
pages = {21--29},
|
|
doi = {10.1109/sequen.1997.666900},
|
|
url = {https://doi.org/10.1109/sequen.1997.666900},
|
|
note = {MinHash for near-duplicate detection at web scale},
|
|
source = {Crossref},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://doi.org/10.1109/SEQUEN.1997.666900},
|
|
}
|
|
|
|
@book{case2002constructing,
|
|
title = {Constructing Written Test Questions for the Basic and Clinical Sciences},
|
|
author = {Case, Susan M. and Swanson, David B.},
|
|
year = {2002},
|
|
publisher = {National Board of Medical Examiners},
|
|
note = {USMLE test blueprinting with 4+ independent classification axes per item},
|
|
edition = {3rd},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@article{chen2021humaneval,
|
|
title = {Evaluating Large Language Models Trained on Code},
|
|
author = {
|
|
Chen, Mark and Tworek, Jerry and Jun, Heewoo and Yuan, Qiming and de Oliveira Pinto, Henrique
|
|
and Kaplan, Jared and Edwards, Harri and Burda, Yuri and Joseph, Nicholas and Brockman, Greg
|
|
and others
|
|
},
|
|
year = {2021},
|
|
journal = {arXiv preprint arXiv:2107.03374},
|
|
note = {HumanEval: coding benchmark analogous to StaffML's napkin-math assessment},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@misc{cs249r,
|
|
title = {{CS249r}: Tiny Machine Learning --- Harvard University},
|
|
author = {Janapa Reddi, Vijay},
|
|
year = {2024},
|
|
url = {https://sites.google.com/g.harvard.edu/cs249-tinyml-2024},
|
|
note = {Graduate course on ML systems spanning cloud to embedded deployment},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://sites.google.com/g.harvard.edu/cs249-tinyml-2024},
|
|
}
|
|
|
|
@inproceedings{dao2022,
|
|
title = {FlashAttention: Fast and Memory-Efficient Exact Attention With IO-Awareness},
|
|
author = {Dao, Tri and Fu, Daniel Y. and Ermon, Stefano and Rudra, Atri and R{\'e}, Christopher},
|
|
year = {2022},
|
|
booktitle = {Advances in Neural Information Processing Systems 35},
|
|
publisher = {Neural Information Processing Systems Foundation, Inc. (NeurIPS)},
|
|
volume = {35},
|
|
pages = {16344--16359},
|
|
doi = {10.52202/068431-1189},
|
|
url = {https://doi.org/10.52202/068431-1189},
|
|
note = {
|
|
IO-aware attention algorithm that reduces memory reads/writes from quadratic to linear in
|
|
sequence length
|
|
},
|
|
source = {Crossref},
|
|
x-verified = {2026-05-03},
|
|
x-verified-by = {claude-bib-audit-2026-05},
|
|
x-verified-status = {verified},
|
|
x-verified-source = {
|
|
https://papers.nips.cc/paper\_files/paper/2022/hash/67d57c32e20fd0a7a302cb81d36e40d5-Abstract-Conference.html
|
|
},
|
|
}
|
|
|
|
@inproceedings{frantar2023gptq,
|
|
title = {{GPTQ}: Accurate Post-Training Quantization for Generative Pre-trained Transformers},
|
|
author = {Frantar, Elias and Ashkboos, Saleh and Hoefler, Torsten and Alistarh, Dan},
|
|
year = {2023},
|
|
booktitle = {International Conference on Learning Representations (ICLR)},
|
|
publisher = {OpenReview.net},
|
|
url = {https://arxiv.org/abs/2210.17323},
|
|
note = {GPTQ: layer-by-layer one-shot post-training INT4 quantization for LLMs},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {paper-revision-fresh-reader-pass},
|
|
}
|
|
|
|
@book{gierl2013automatic,
|
|
title = {Automatic Item Generation},
|
|
author = {Gierl, Mark J. and Haladyna, Thomas M.},
|
|
year = {2013},
|
|
publisher = {Routledge},
|
|
doi = {10.4324/9780203803912},
|
|
isbn = {9781136636899},
|
|
url = {https://doi.org/10.4324/9780203803912},
|
|
note = {Foundational text on template-based assessment item generation},
|
|
source = {Crossref},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://doi.org/10.4324/9780203803912},
|
|
}
|
|
|
|
@article{gu2023mamba,
|
|
title = {{Mamba}: Linear-Time Sequence Modeling with Selective State Spaces},
|
|
author = {Gu, Albert and Dao, Tri},
|
|
year = {2023},
|
|
journal = {arXiv preprint arXiv:2312.00752},
|
|
url = {https://arxiv.org/abs/2312.00752},
|
|
note = {
|
|
Selective state-space models: linear-time scaling, alternative to attention for long sequences
|
|
},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {paper-revision-fresh-reader-pass},
|
|
}
|
|
|
|
@book{hambleton1991,
|
|
title = {Fundamentals of Item Response Theory},
|
|
author = {Hambleton, Ronald K. and Swaminathan, Hariharan and Rogers, H. Jane},
|
|
year = {1991},
|
|
publisher = {SAGE Publications},
|
|
isbn = {9780803936478},
|
|
note = {Practical guide to IRT. 30+ responses needed for stable calibration},
|
|
}
|
|
|
|
@inproceedings{hendrycks2021mmlu,
|
|
title = {Measuring Massive Multitask Language Understanding},
|
|
author = {
|
|
Hendrycks, Dan and Burns, Collin and Basart, Steven and Zou, Andy and Mazeika, Mantas and Song,
|
|
Dawn and Steinhardt, Jacob
|
|
},
|
|
year = {2021},
|
|
booktitle = {International Conference on Learning Representations (ICLR)},
|
|
publisher = {OpenReview.net},
|
|
url = {https://arxiv.org/abs/2009.03300},
|
|
note = {MMLU: 57-subject benchmark for evaluating domain coverage and difficulty calibration},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {bib-web-verify},
|
|
x-verified-source = {https://openreview.net/forum?id=d7KBjmI3GmQ},
|
|
}
|
|
|
|
@article{hjorland2013,
|
|
title = {Facet Analysis: The Logical Approach to Knowledge Organization},
|
|
author = {Hj{\o}rland, Birger},
|
|
year = {2013},
|
|
journal = {Information Processing \& Management},
|
|
publisher = {Elsevier BV},
|
|
volume = {49},
|
|
number = {2},
|
|
pages = {545--557},
|
|
doi = {10.1016/j.ipm.2012.10.001},
|
|
issn = {0306-4573},
|
|
url = {https://doi.org/10.1016/j.ipm.2012.10.001},
|
|
note = {Faceted classification: independent orthogonal axes rather than a single hierarchy},
|
|
source = {Crossref},
|
|
x-verified = {2026-05-03},
|
|
x-verified-by = {claude-bib-audit-2026-05},
|
|
x-verified-status = {verified},
|
|
x-verified-source = {https://doi.org/10.1016/j.ipm.2012.10.001},
|
|
}
|
|
|
|
@inproceedings{huang2019gpipe,
|
|
title = {{GPipe}: Efficient Training of Giant Neural Networks using Pipeline Parallelism},
|
|
author = {
|
|
Huang, Yanping and Cheng, Youlong and Bapna, Ankur and Firat, Orhan and Chen, Dehao and Chen,
|
|
Mia Xu and Lee, HyoukJoong and Ngiam, Jiquan and Le, Quoc V. and Wu, Yonghui and Chen, Zhifeng
|
|
},
|
|
year = {2019},
|
|
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
|
|
publisher = {Curran Associates Inc.},
|
|
volume = {32},
|
|
pages = {103--112},
|
|
url = {
|
|
https://papers.nips.cc/paper/8305-gpipe-efficient-training-of-giant-neural-networks-using-pipeline-parallelism
|
|
},
|
|
note = {Pipeline parallelism for training models that exceed single-device memory},
|
|
x-verified = {2026-05-03},
|
|
x-verified-by = {claude-bib-audit-2026-05},
|
|
x-verified-status = {verified},
|
|
x-verified-source = {
|
|
https://papers.nips.cc/paper/8305-gpipe-efficient-training-of-giant-neural-networks-using-pipeline-parallelism
|
|
},
|
|
}
|
|
|
|
@book{huyen2022designing,
|
|
title = {Designing Machine Learning Systems},
|
|
author = {Huyen, Chip},
|
|
year = {2022},
|
|
publisher = {O'Reilly Media},
|
|
isbn = {978-1098107963},
|
|
note = {ML systems design textbook focused on production deployment},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@inproceedings{jimenez2024swebench,
|
|
title = {{SWE-bench}: Can Language Models Resolve Real-World GitHub Issues?},
|
|
author = {
|
|
Jimenez, Carlos E and Yang, John and Wettig, Alexander and Yao, Shunyu and Pei, Kexin and
|
|
Press, Ofir and Narasimhan, Karthik
|
|
},
|
|
year = {2024},
|
|
booktitle = {International Conference on Learning Representations (ICLR)},
|
|
publisher = {OpenReview.net},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {bib-web-verify},
|
|
x-verified-source = {https://openreview.net/forum?id=VTF8yNQM66},
|
|
}
|
|
|
|
@inproceedings{kwon2023,
|
|
title = {Efficient Memory Management for Large Language Model Serving With PagedAttention},
|
|
author = {
|
|
Kwon, Woosuk and Li, Zhuohan and Zhuang, Siyuan and Sheng, Ying and Zheng, Lianmin and Yu, Cody
|
|
Hao and Gonzalez, Joseph E. and Zhang, Hao and Stoica, Ion
|
|
},
|
|
year = {2023},
|
|
booktitle = {Proceedings of the 29th Symposium on Operating Systems Principles},
|
|
publisher = {ACM},
|
|
pages = {611--626},
|
|
doi = {10.1145/3600006.3613165},
|
|
url = {https://doi.org/10.1145/3600006.3613165},
|
|
note = {vLLM: virtual memory paging for KV-cache reduces fragmentation and enables higher throughput},
|
|
source = {Crossref},
|
|
x-verified = {2026-05-03},
|
|
x-verified-by = {claude-bib-audit-2026-05},
|
|
x-verified-status = {verified},
|
|
x-verified-source = {
|
|
https://dl.acm.org/doi/10.1145/3600006.3613165;
|
|
https://dblp.org/rec/conf/sosp/KwonLZ0ZY0ZS23.html
|
|
},
|
|
}
|
|
|
|
@misc{leetcode2024,
|
|
title = {{LeetCode} --- Online Coding Platform},
|
|
author = {{LeetCode Inc.}},
|
|
year = {2024},
|
|
url = {https://leetcode.com},
|
|
note = {2,869+ validated coding problems with community-driven difficulty calibration},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://leetcode.com},
|
|
}
|
|
|
|
@inproceedings{lin2020mcunet,
|
|
title = {{MCUNet}: Tiny Deep Learning on {IoT} Devices},
|
|
author = {Lin, Ji and Chen, Wei-Ming and Lin, Yujun and Cohn, John and Gan, Chuang and Han, Song},
|
|
year = {2020},
|
|
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
|
|
publisher = {Curran Associates Inc.},
|
|
pages = {11711--11722},
|
|
url = {https://papers.nips.cc/paper/2020/hash/86c51678350f656dcc7f490a43946ee5-Abstract.html},
|
|
x-verified = {2026-05-03},
|
|
x-verified-by = {claude-bib-audit-2026-05},
|
|
x-verified-status = {verified},
|
|
x-verified-source = {https://papers.nips.cc/paper/2020/hash/86c51678350f656dcc7f490a43946ee5-Abstract.html},
|
|
}
|
|
|
|
@article{lin2025,
|
|
title = {AWQ: Activation-Aware Weight Quantization for On-Device LLM Compression and Acceleration},
|
|
author = {
|
|
Lin, Ji and Tang, Jiaming and Tang, Haotian and Yang, Shang and Chen, Wei-Ming and Wang,
|
|
Wei-Chen and Xiao, Guangxuan and Dang, Xingyu and Gan, Chuang and Han, Song
|
|
},
|
|
year = {2024},
|
|
journal = {GetMobile: Mobile Computing and Communications},
|
|
booktitle = {Proceedings of Machine Learning and Systems (MLSys)},
|
|
publisher = {Association for Computing Machinery (ACM)},
|
|
volume = {28},
|
|
number = {4},
|
|
pages = {12--17},
|
|
doi = {10.1145/3714983.3714987},
|
|
issn = {2375-0529, 2375-0537},
|
|
url = {https://doi.org/10.1145/3714983.3714987},
|
|
note = {AWQ: salient-weight-aware INT4 weight-only quantization for LLM serving},
|
|
source = {Crossref},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {paper-revision-fresh-reader-pass},
|
|
}
|
|
|
|
@book{lord2012,
|
|
title = {Applications of Item Response Theory to Practical Testing Problems},
|
|
author = {Lord, Frederic M.},
|
|
year = {1980},
|
|
publisher = {Routledge},
|
|
doi = {10.4324/9780203056615},
|
|
isbn = {9781136557248},
|
|
url = {https://doi.org/10.4324/9780203056615},
|
|
note = {Foundational IRT text. b-parameter (difficulty), a-parameter (discrimination)},
|
|
source = {Crossref},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@inproceedings{mattson2020mlperf,
|
|
title = {{MLPerf Training Benchmark}},
|
|
author = {
|
|
Mattson, Peter and Cheng, Christine and Coleman, Cody and Diamos, Greg and Micikevicius,
|
|
Paulius and Patterson, David and Tang, Hanlin and Wei, Gu-Yeon and Bailis, Peter and Bittorf,
|
|
Victor and Brooks, David and Chen, Dehao and Dutta, Debojyoti and Gupta, Udit and Hazelwood,
|
|
Kim and Hock, Andrew and Huang, Xinyuan and Ike, Atsushi and Jia, Bill and Kang, Daniel and
|
|
Kanter, David and Kumar, Naveen and Liao, Jeffery and Ma, Guokai and Narayanan, Deepak and
|
|
Oguntebi, Tayo and Pekhimenko, Gennady and Pentecost, Lillian and Reddi, Vijay Janapa and
|
|
Robie, Taylor and St~John, Tom and Wu, Carole-Jean and Xu, Lingjie and Young, Cliff and
|
|
Zaharia, Matei
|
|
},
|
|
year = {2020},
|
|
booktitle = {Proceedings of Machine Learning and Systems (MLSys)},
|
|
publisher = {mlsys.org},
|
|
volume = {2},
|
|
pages = {336--349},
|
|
url = {https://arxiv.org/abs/1910.01500},
|
|
note = {The original MLPerf Training benchmark establishing standardized ML system measurement},
|
|
eprint = {1910.01500},
|
|
archiveprefix = {arXiv},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {bib-web-verify},
|
|
x-verified-source = {
|
|
https://proceedings.mlsys.org/paper\_files/paper/2020/hash/411e39b117e885341f25efb8912945f7-Abstract.html
|
|
},
|
|
}
|
|
|
|
@article{messick1995,
|
|
title = {
|
|
Validity of Psychological Assessment: Validation of Inferences From Persons' Responses and
|
|
Performances as Scientific Inquiry Into Score Meaning
|
|
},
|
|
author = {Messick, Samuel},
|
|
year = {1995},
|
|
journal = {Am. Psychol.},
|
|
publisher = {American Psychological Association (APA)},
|
|
volume = {50},
|
|
number = {9},
|
|
pages = {741--749},
|
|
doi = {10.1037/0003-066x.50.9.741},
|
|
issn = {1935-990X, 0003-066X},
|
|
url = {https://doi.org/10.1037/0003-066x.50.9.741},
|
|
note = {Unified validity framework: content, construct, consequential validity},
|
|
source = {Crossref},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://doi.org/10.1037/0003-066X.50.9.741},
|
|
}
|
|
|
|
@incollection{millman1989,
|
|
title = {The Specification and Development of Tests of Achievement and Ability},
|
|
author = {Millman, Jason and Greene, Jennifer},
|
|
year = {1989},
|
|
booktitle = {Educational Measurement},
|
|
publisher = {American Council on Education / Macmillan},
|
|
pages = {335--366},
|
|
note = {Test blueprinting and tables of specifications; standard reference for coverage validation},
|
|
editor = {Linn, Robert L.},
|
|
edition = {3rd},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {bib-web-verify},
|
|
x-verified-source = {https://eric.ed.gov/?id=ED372105},
|
|
}
|
|
|
|
@article{mislevy2003,
|
|
title = {A Brief Introduction to Evidence-Centered Design},
|
|
author = {Mislevy, Robert J. and Almond, Russell G. and Lukas, Janice F.},
|
|
year = {2003},
|
|
journal = {ETS Research Report Series},
|
|
publisher = {Wiley},
|
|
volume = {2003},
|
|
number = {1},
|
|
doi = {10.1002/j.2333-8504.2003.tb01908.x},
|
|
issn = {2330-8516, 2330-8516},
|
|
url = {https://doi.org/10.1002/j.2333-8504.2003.tb01908.x},
|
|
note = {ECD framework: claims, evidence, task features for assessment design},
|
|
source = {Crossref},
|
|
institution = {Educational Testing Service},
|
|
type = {ETS Research Report},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {bib-web-verify},
|
|
x-verified-source = {https://www.ets.org/research/policy\_research\_reports/publications/report/2003/hsgs.html},
|
|
}
|
|
|
|
@misc{neetcode2024,
|
|
title = {{NeetCode} 150 --- Curated Coding Interview Problems},
|
|
author = {{NeetCode}},
|
|
year = {2024},
|
|
url = {https://neetcode.io/practice},
|
|
note = {150 problems organized by pattern with difficulty progression},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://neetcode.io/practice},
|
|
}
|
|
|
|
@techreport{nvidia2022h100,
|
|
title = {{NVIDIA H100} Tensor Core {GPU} Architecture},
|
|
author = {{NVIDIA Corporation}},
|
|
year = {2022},
|
|
url = {https://resources.nvidia.com/en-us-tensor-core/gtc22-whitepaper-hopper},
|
|
note = {H100 datasheet: 80 GB HBM3, 3.35 TB/s, 989 TFLOPS FP16 dense, 494 TFLOPS TF32 dense},
|
|
institution = {NVIDIA Corporation},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {paper-revision-fresh-reader-pass},
|
|
}
|
|
|
|
@techreport{nvidia2022orin,
|
|
title = {{NVIDIA Jetson AGX Orin} Series Technical Brief},
|
|
author = {{NVIDIA Corporation}},
|
|
year = {2022},
|
|
url = {https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/},
|
|
note = {Orin AGX: 275 TOPS INT8 sparse, 60 W power envelope, 32 GB LPDDR5},
|
|
institution = {NVIDIA Corporation},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {paper-revision-fresh-reader-pass},
|
|
}
|
|
|
|
@techreport{openai2023gpt4,
|
|
title = {GPT-4 Technical Report},
|
|
author = {{OpenAI}},
|
|
year = {2023},
|
|
institution = {OpenAI},
|
|
eprint = {2303.08774},
|
|
archiveprefix = {arXiv},
|
|
primaryclass = {cs.CL},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@techreport{qti2020,
|
|
title = {{QTI} --- Question and Test Interoperability},
|
|
author = {{1EdTech Consortium}},
|
|
year = {2020},
|
|
url = {https://www.1edtech.org/standards/qti},
|
|
note = {XML-based standard for assessment item portability across platforms},
|
|
institution = {1EdTech (formerly IMS Global)},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://www.1edtech.org/standards/qti},
|
|
}
|
|
|
|
@inproceedings{rajbhandari2020,
|
|
title = {ZeRO: Memory Optimizations Toward Training Trillion Parameter Models},
|
|
author = {Rajbhandari, Samyam and Rasley, Jeff and Ruwase, Olatunji and He, Yuxiong},
|
|
year = {2020},
|
|
booktitle = {
|
|
SC20: International Conference for High Performance Computing, Networking, Storage and Analysis
|
|
},
|
|
publisher = {IEEE},
|
|
pages = {1--16},
|
|
doi = {10.1109/sc41405.2020.00024},
|
|
url = {https://doi.org/10.1109/sc41405.2020.00024},
|
|
note = {ZeRO optimizer partitioning eliminates memory redundancy in data parallelism},
|
|
source = {Crossref},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://doi.org/10.1109/SC41405.2020.00024},
|
|
}
|
|
|
|
@inproceedings{rasley2020,
|
|
title = {DeepSpeed},
|
|
author = {Rasley, Jeff and Rajbhandari, Samyam and Ruwase, Olatunji and He, Yuxiong},
|
|
year = {2020},
|
|
booktitle = {
|
|
Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery \& Data
|
|
Mining
|
|
},
|
|
publisher = {ACM},
|
|
pages = {3505--3506},
|
|
doi = {10.1145/3394486.3406703},
|
|
url = {https://doi.org/10.1145/3394486.3406703},
|
|
note = {
|
|
Training system combining ZeRO, pipeline parallelism, and mixed precision for 100B+ parameter
|
|
models
|
|
},
|
|
subtitle = {System Optimizations Enable Training Deep Learning Models With Over 100 Billion Parameters},
|
|
source = {Crossref},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://doi.org/10.1145/3394486.3406703},
|
|
}
|
|
|
|
@inproceedings{reddi2020,
|
|
title = {MLPerf Inference Benchmark},
|
|
author = {
|
|
Reddi, Vijay Janapa and Cheng, Christine and Kanter, David and Mattson, Peter and Schmuelling,
|
|
Guenther and Wu, Carole-Jean and Anderson, Brian and Breughe, Maximilien and Charlebois, Mark
|
|
and Chou, William and Chukka, Ramesh and Coleman, Cody and Davis, Sam and Deng, Pan and Diamos,
|
|
Greg and Duke, Jared and Fick, Dave and Gardner, J. Scott and Hubara, Itay and Idgunji, Sachin
|
|
and Jablin, Thomas B. and Jiao, Jeff and St. John, Tom and Kanwar, Pankaj and Lee, David and
|
|
Liao, Jeffery and Lokhmotov, Anton and Massa, Francisco and Meng, Peng and Micikevicius,
|
|
Paulius and Osborne, Colin and Pekhimenko, Gennady and Rajan, Arun Tejusve Raghunath and
|
|
Sequeira, Dilip and Sirasao, Ashish and Sun, Fei and Tang, Hanlin and Thomson, Michael and Wei,
|
|
Frank and Wu, Ephrem and Xu, Lingjie and Yamada, Koichi and Yu, Bing and Yuan, George and
|
|
Zhong, Aaron and Zhang, Peizhao and Zhou, Yuchen
|
|
},
|
|
year = {2020},
|
|
booktitle = {2020 ACM/IEEE 47th Annual International Symposium on Computer Architecture (ISCA)},
|
|
publisher = {IEEE},
|
|
pages = {446--459},
|
|
doi = {10.1109/isca45697.2020.00045},
|
|
url = {https://doi.org/10.1109/isca45697.2020.00045},
|
|
note = {Standardized benchmarks for ML system performance across inference scenarios},
|
|
source = {Crossref},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://doi.org/10.1109/ISCA45697.2020.00045},
|
|
}
|
|
|
|
@book{reddi2026a,
|
|
title = {Machine Learning Systems at Scale},
|
|
author = {Reddi, Vijay Janapa},
|
|
year = {2026},
|
|
publisher = {MIT Press},
|
|
url = {https://mlsysbook.ai},
|
|
note = {Volume II of the Machine Learning Systems textbook},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {bib-web-verify},
|
|
x-verified-source = {https://mlsysbook.ai/},
|
|
}
|
|
|
|
@book{reddi2026mlsys,
|
|
title = {Machine Learning Systems},
|
|
author = {Reddi, Vijay Janapa},
|
|
year = {2026},
|
|
publisher = {MIT Press},
|
|
url = {https://mlsysbook.ai},
|
|
note = {
|
|
Two-volume textbook on ML systems following the Hennessy \& Patterson pedagogical model.
|
|
Volume~I covers single-machine systems; Volume~II covers distributed systems at scale.
|
|
},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://mlsysbook.ai},
|
|
}
|
|
|
|
@software{reddi2026mlsysim,
|
|
title = {{MLSys$\cdot$im}: First-Principles Infrastructure Modeling for Machine Learning Systems},
|
|
author = {Reddi, Vijay Janapa},
|
|
year = {2026},
|
|
url = {https://mlsysbook.ai/mlsysim},
|
|
note = {
|
|
Companion analytical modeling framework for the ML Systems textbook. Provides shared hardware
|
|
constants, roofline analysis, and quantitative verification of systems reasoning
|
|
},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://mlsysbook.ai/mlsysim},
|
|
}
|
|
|
|
@article{reddi2026tinytorch,
|
|
title = {TinyTorch: Building Machine Learning Systems from First Principles},
|
|
author = {Reddi, Vijay Janapa},
|
|
year = {2026},
|
|
journal = {arXiv preprint arXiv:2601.19107},
|
|
note = {
|
|
Educational ML framework with 20 modules teaching ML as systems engineering from first
|
|
principles
|
|
},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@inproceedings{reimers2019,
|
|
title = {Sentence-BERT: Sentence Embeddings Using Siamese BERT-Networks},
|
|
author = {Reimers, Nils and Gurevych, Iryna},
|
|
year = {2019},
|
|
booktitle = {
|
|
Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the
|
|
9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
|
|
},
|
|
publisher = {Association for Computational Linguistics},
|
|
pages = {3980--3990},
|
|
doi = {10.18653/v1/d19-1410},
|
|
url = {https://doi.org/10.18653/v1/d19-1410},
|
|
note = {Sentence embeddings used for semantic deduplication of question scenarios},
|
|
source = {Crossref},
|
|
x-verified = {2026-04-26},
|
|
x-verified-by = {paper-revision-fresh-reader-pass},
|
|
}
|
|
|
|
@article{shoeybi2019megatron,
|
|
title = {Megatron-{LM}: Training Multi-Billion Parameter Language Models Using Model Parallelism},
|
|
author = {
|
|
Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared
|
|
and Catanzaro, Bryan
|
|
},
|
|
year = {2019},
|
|
journal = {arXiv preprint arXiv:1909.08053},
|
|
note = {Tensor parallelism for training large language models across GPUs},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@techreport{skos2009,
|
|
title = {{SKOS} Simple Knowledge Organization System Reference},
|
|
author = {{W3C}},
|
|
year = {2009},
|
|
url = {https://www.w3.org/TR/skos-reference/},
|
|
note = {Standard for broader/narrower/related concept relationships},
|
|
institution = {World Wide Web Consortium},
|
|
type = {W3C Recommendation},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://www.w3.org/TR/skos-reference/},
|
|
}
|
|
|
|
@book{soergel1985organizing,
|
|
title = {Organizing Information: Principles of Data Base and Retrieval Systems},
|
|
author = {Soergel, Dagobert},
|
|
year = {1985},
|
|
publisher = {Academic Press},
|
|
note = {User warrant vs literary warrant in knowledge organization},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@misc{tay2024interview,
|
|
title = {Grind 75: Interview Problem List},
|
|
author = {Tay, Yangshun},
|
|
year = {2024},
|
|
url = {https://www.techinterviewhandbook.org/grind75},
|
|
note = {169 problems ranked by frequency, pattern coverage, and difficulty},
|
|
howpublished = {Tech Interview Handbook},
|
|
}
|
|
|
|
@misc{thompson2011sympathy,
|
|
title = {Mechanical Sympathy},
|
|
author = {Thompson, Martin},
|
|
year = {2011},
|
|
url = {https://mechanical-sympathy.blogspot.com/},
|
|
note = {
|
|
Blog. The term describes software that works with the hardware rather than against it, inspired
|
|
by racing driver Jackie Stewart's philosophy
|
|
},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
x-verified-source = {https://mechanical-sympathy.blogspot.com/},
|
|
}
|
|
|
|
@article{webb1997dok,
|
|
title = {Criteria for Alignment of Expectations and Assessments in Mathematics and Science Education},
|
|
author = {Webb, Norman L.},
|
|
year = {1997},
|
|
journal = {Research Monograph No. 6},
|
|
publisher = {Council of Chief State School Officers},
|
|
volume = {6},
|
|
note = {
|
|
Depth of Knowledge (DOK) framework: four levels of cognitive complexity for assessment
|
|
alignment
|
|
},
|
|
x-verified = {2026-05-04},
|
|
x-verified-by = {openai-MODEL},
|
|
x-verified-status = {verified},
|
|
x-verified-source = {https://eric.ed.gov/?id=ED414305},
|
|
}
|
|
|
|
@book{wiggins2005understanding,
|
|
title = {Understanding by Design},
|
|
author = {Wiggins, Grant and McTighe, Jay},
|
|
year = {2005},
|
|
publisher = {Association for Supervision and Curriculum Development},
|
|
note = {Backward design methodology: desired results, acceptable evidence, then learning plan},
|
|
edition = {2nd},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@article{williams2009,
|
|
title = {Roofline},
|
|
author = {Williams, Samuel and Waterman, Andrew and Patterson, David},
|
|
year = {2009},
|
|
journal = {Communications of the ACM},
|
|
publisher = {Association for Computing Machinery (ACM)},
|
|
volume = {52},
|
|
number = {4},
|
|
pages = {65--76},
|
|
doi = {10.1145/1498765.1498785},
|
|
issn = {0001-0782, 1557-7317},
|
|
url = {https://doi.org/10.1145/1498765.1498785},
|
|
note = {
|
|
Canonical roofline model: arithmetic intensity, ridge point, compute- vs memory-bound
|
|
classification
|
|
},
|
|
subtitle = {An Insightful Visual Performance Model for Multicore Architectures},
|
|
source = {Crossref},
|
|
x-verified = {2026-05-03},
|
|
x-verified-by = {claude-bib-audit-2026-05},
|
|
x-verified-status = {verified},
|
|
x-verified-source = {https://doi.org/10.1145/1498765.1498785},
|
|
}
|
|
|
|
@inproceedings{yu2022orca,
|
|
title = {{Orca}: A Distributed Serving System for Transformer-Based Generative Models},
|
|
author = {Yu, Gyeong-In and Jeong, Joo Seong and Kim, Geon-Woo and Kim, Soojeong and Chun, Byung-Gon},
|
|
year = {2022},
|
|
booktitle = {
|
|
Proceedings of the 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI)
|
|
},
|
|
publisher = {USENIX Association},
|
|
pages = {521--538},
|
|
note = {
|
|
Iteration-level scheduling (continuous batching) for transformer serving, eliminating padding
|
|
waste
|
|
},
|
|
x-verified = {2026-04-09},
|
|
x-verified-by = {pass-17-bib-hygiene},
|
|
}
|
|
|
|
@inproceedings{zheng2023judging,
|
|
title = {Judging LLM-As-A-Judge With MT-Bench and Chatbot Arena},
|
|
author = {
|
|
Zheng, Lianmin and Chiang, Wei-Lin and Sheng, Ying and Zhuang, Siyuan and Wu, Zhanghao and
|
|
Zhuang, Yonghao and Lin, Zi and Li, Zhuohan and Li, Dacheng and Xing, Eric P. and Zhang, Hao
|
|
and Gonzalez, Joseph E. and Stoica, Ion
|
|
},
|
|
year = {2023},
|
|
booktitle = {Advances in Neural Information Processing Systems 36},
|
|
publisher = {Neural Information Processing Systems Foundation, Inc. (NeurIPS)},
|
|
pages = {46595--46623},
|
|
doi = {10.52202/075280-2020},
|
|
url = {https://doi.org/10.52202/075280-2020},
|
|
note = {NeurIPS 2023 Datasets and Benchmarks track},
|
|
source = {Crossref},
|
|
x-verified = {2026-05-03},
|
|
x-verified-by = {claude-bib-audit-2026-05},
|
|
x-verified-status = {verified},
|
|
}
|