cs249r_book/interviews/paper/references.bib

@book{aera2014standards,
  title = {Standards for Educational and Psychological Testing},
  author = {
    {American Educational Research Association} and {American Psychological Association} and
    {National Council on Measurement in Education}
  },
  year = {2014},
  publisher = {American Educational Research Association},
  note = {Gold standard for assessment validation: content, construct, consequential validity},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@article{agrawal2025,
  title = {Efficient LLM Inference via Chunked Prefills},
  author = {
    Agrawal, Amey and Kedia, Nitin and Panwar, Ashish and Mohan, Jayashree and Kwatra, Nipun and
    Gulavani, Bhargav S. and Tumanov, Alexey and Ramjee, Ramachandran
  },
  year = {2024},
  journal = {ACM SIGOPS Operating Systems Review},
  booktitle = {18th USENIX Symposium on Operating Systems Design and Implementation (OSDI)},
  publisher = {Association for Computing Machinery (ACM)},
  volume = {59},
  number = {1},
  pages = {9--16},
  doi = {10.1145/3759441.3759444},
  issn = {0163-5980},
  url = {https://doi.org/10.1145/3759441.3759444},
  note = {
    Sarathi-Serve: chunked prefill with priority scheduling to bound TTFT under continuous batching
  },
  source = {Crossref},
  x-verified = {2026-04-26},
  x-verified-by = {paper-revision-fresh-reader-pass},
}

@inproceedings{ainslie2023,
  title = {GQA: Training Generalized Multi-Query Transformer Models From Multi-Head Checkpoints},
  author = {
    Ainslie, Joshua and Lee-Thorp, James and de Jong, Michiel and Zemlyanskiy, Yury and Lebr{\'o}n,
    Federico and Sanghai, Sumit
  },
  year = {2023},
  journal = {
    Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing (EMNLP)
  },
  booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},
  publisher = {Association for Computational Linguistics},
  volume = {2023},
  pages = {4895--4901},
  doi = {10.18653/v1/2023.emnlp-main.298},
  url = {https://doi.org/10.18653/v1/2023.emnlp-main.298},
  note = {Grouped-query attention: KV-head reduction reduces KV-cache memory at modest quality cost},
  source = {Crossref},
  x-verified = {2026-05-04},
  x-verified-by = {openai-MODEL},
  x-verified-status = {verified},
  x-verified-source = {https://aclanthology.org/2023.emnlp-main.298/},
}

@techreport{amd2023mi300x,
  title = {{AMD Instinct MI300X} Accelerator Datasheet},
  author = {{AMD}},
  year = {2023},
  url = {https://www.amd.com/en/products/accelerators/instinct/mi300/mi300x.html},
  note = {MI300X: 192 GB HBM3, 5.3 TB/s memory bandwidth, 1.3 PFLOPS FP16 dense},
  institution = {Advanced Micro Devices},
  x-verified = {2026-04-26},
  x-verified-by = {paper-revision-fresh-reader-pass},
}

@book{anderson2001taxonomy,
  title = {
    A Taxonomy for Learning, Teaching, and Assessing: A Revision of Bloom's Taxonomy of Educational
    Objectives
  },
  author = {
    Anderson, Lorin W. and Krathwohl, David R. and Airasian, Peter W. and Cruikshank, Kathleen A.
    and Mayer, Richard E. and Pintrich, Paul R. and Raths, James and Wittrock, Merlin C.
  },
  year = {2001},
  publisher = {Longman},
  note = {The revised Bloom's taxonomy: Remember, Understand, Apply, Analyze, Evaluate, Create},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@misc{anking2024,
  title = {{AnKing} Step Deck --- Community Quality Control},
  author = {{AnKing Team}},
  year = {2024},
  url = {https://www.ankingmed.com},
  note = {Large-scale Anki deck with community-driven dedup and quality flags},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://www.ankingmed.com},
}

@techreport{arm_cortex_m4,
  title = {{Arm Cortex-M4} Technical Reference Manual},
  author = {{Arm Limited}},
  year = {2020},
  url = {https://developer.arm.com/documentation/100166/0001},
  note = {Cortex-M4: 64 MHz typical, 256 KB SRAM, no MMU, FPU optional, hard real-time},
  institution = {Arm Limited},
  x-verified = {2026-04-26},
  x-verified-by = {paper-revision-fresh-reader-pass},
}

@book{biggs1982solo,
  title = {
    Evaluating the Quality of Learning: The {SOLO} Taxonomy (Structure of the Observed Learning
    Outcome)
  },
  author = {Biggs, John B. and Collis, Kevin F.},
  year = {1982},
  publisher = {Academic Press},
  note = {SOLO taxonomy: prestructural, unistructural, multistructural, relational, extended abstract},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@book{bloom1956taxonomy,
  title = {Taxonomy of Educational Objectives: The Classification of Educational Goals},
  author = {
    Bloom, Benjamin S. and Engelhart, Max D. and Furst, Edward J. and Hill, Walker H. and
    Krathwohl, David R.
  },
  year = {1956},
  publisher = {David McKay Company},
  note = {Original Bloom's taxonomy. Revised by Anderson \& Krathwohl (2001)},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@inproceedings{broder_e738fd4b,
  title = {On the Resemblance and Containment of Documents},
  author = {Broder, Andrei Z.},
  year = {1997},
  booktitle = {Proceedings. Compression and Complexity of SEQUENCES 1997 (Cat. No.97TB100171)},
  publisher = {IEEE Comput. Soc},
  pages = {21--29},
  doi = {10.1109/sequen.1997.666900},
  url = {https://doi.org/10.1109/sequen.1997.666900},
  note = {MinHash for near-duplicate detection at web scale},
  source = {Crossref},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://doi.org/10.1109/SEQUEN.1997.666900},
}

@book{case2002constructing,
  title = {Constructing Written Test Questions for the Basic and Clinical Sciences},
  author = {Case, Susan M. and Swanson, David B.},
  year = {2002},
  publisher = {National Board of Medical Examiners},
  note = {USMLE test blueprinting with 4+ independent classification axes per item},
  edition = {3rd},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@article{chen2021humaneval,
  title = {Evaluating Large Language Models Trained on Code},
  author = {
    Chen, Mark and Tworek, Jerry and Jun, Heewoo and Yuan, Qiming and de Oliveira Pinto, Henrique
    and Kaplan, Jared and Edwards, Harri and Burda, Yuri and Joseph, Nicholas and Brockman, Greg
    and others
  },
  year = {2021},
  journal = {arXiv preprint arXiv:2107.03374},
  note = {HumanEval: coding benchmark analogous to StaffML's napkin-math assessment},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@misc{cs249r,
  title = {{CS249r}: Tiny Machine Learning --- Harvard University},
  author = {Janapa Reddi, Vijay},
  year = {2024},
  url = {https://sites.google.com/g.harvard.edu/cs249-tinyml-2024},
  note = {Graduate course on ML systems spanning cloud to embedded deployment},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://sites.google.com/g.harvard.edu/cs249-tinyml-2024},
}

@inproceedings{dao2022,
  title = {FlashAttention: Fast and Memory-Efficient Exact Attention With IO-Awareness},
  author = {Dao, Tri and Fu, Daniel Y. and Ermon, Stefano and Rudra, Atri and R{\'e}, Christopher},
  year = {2022},
  booktitle = {Advances in Neural Information Processing Systems 35},
  publisher = {Neural Information Processing Systems Foundation, Inc. (NeurIPS)},
  volume = {35},
  pages = {16344--16359},
  doi = {10.52202/068431-1189},
  url = {https://doi.org/10.52202/068431-1189},
  note = {
    IO-aware attention algorithm that reduces memory reads/writes from quadratic to linear in
    sequence length
  },
  source = {Crossref},
  x-verified = {2026-05-03},
  x-verified-by = {claude-bib-audit-2026-05},
  x-verified-status = {verified},
  x-verified-source = {
    https://papers.nips.cc/paper\_files/paper/2022/hash/67d57c32e20fd0a7a302cb81d36e40d5-Abstract-Conference.html
  },
}

@inproceedings{frantar2023gptq,
  title = {{GPTQ}: Accurate Post-Training Quantization for Generative Pre-trained Transformers},
  author = {Frantar, Elias and Ashkboos, Saleh and Hoefler, Torsten and Alistarh, Dan},
  year = {2023},
  booktitle = {International Conference on Learning Representations (ICLR)},
  publisher = {OpenReview.net},
  url = {https://arxiv.org/abs/2210.17323},
  note = {GPTQ: layer-by-layer one-shot post-training INT4 quantization for LLMs},
  x-verified = {2026-04-26},
  x-verified-by = {paper-revision-fresh-reader-pass},
}

@book{gierl2013automatic,
  title = {Automatic Item Generation},
  author = {Gierl, Mark J. and Haladyna, Thomas M.},
  year = {2013},
  publisher = {Routledge},
  doi = {10.4324/9780203803912},
  isbn = {9781136636899},
  url = {https://doi.org/10.4324/9780203803912},
  note = {Foundational text on template-based assessment item generation},
  source = {Crossref},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://doi.org/10.4324/9780203803912},
}

@article{gu2023mamba,
  title = {{Mamba}: Linear-Time Sequence Modeling with Selective State Spaces},
  author = {Gu, Albert and Dao, Tri},
  year = {2023},
  journal = {arXiv preprint arXiv:2312.00752},
  url = {https://arxiv.org/abs/2312.00752},
  note = {
    Selective state-space models: linear-time scaling, alternative to attention for long sequences
  },
  x-verified = {2026-04-26},
  x-verified-by = {paper-revision-fresh-reader-pass},
}

@book{hambleton1991,
  title = {Fundamentals of Item Response Theory},
  author = {Hambleton, Ronald K. and Swaminathan, Hariharan and Rogers, H. Jane},
  year = {1991},
  publisher = {SAGE Publications},
  isbn = {9780803936478},
  note = {Practical guide to IRT. 30+ responses needed for stable calibration},
}

@inproceedings{hendrycks2021mmlu,
  title = {Measuring Massive Multitask Language Understanding},
  author = {
    Hendrycks, Dan and Burns, Collin and Basart, Steven and Zou, Andy and Mazeika, Mantas and Song,
    Dawn and Steinhardt, Jacob
  },
  year = {2021},
  booktitle = {International Conference on Learning Representations (ICLR)},
  publisher = {OpenReview.net},
  url = {https://arxiv.org/abs/2009.03300},
  note = {MMLU: 57-subject benchmark for evaluating domain coverage and difficulty calibration},
  x-verified = {2026-04-26},
  x-verified-by = {bib-web-verify},
  x-verified-source = {https://openreview.net/forum?id=d7KBjmI3GmQ},
}

@article{hjorland2013,
  title = {Facet Analysis: The Logical Approach to Knowledge Organization},
  author = {Hj{\o}rland, Birger},
  year = {2013},
  journal = {Information Processing \&amp; Management},
  publisher = {Elsevier BV},
  volume = {49},
  number = {2},
  pages = {545--557},
  doi = {10.1016/j.ipm.2012.10.001},
  issn = {0306-4573},
  url = {https://doi.org/10.1016/j.ipm.2012.10.001},
  note = {Faceted classification: independent orthogonal axes rather than a single hierarchy},
  source = {Crossref},
  x-verified = {2026-05-03},
  x-verified-by = {claude-bib-audit-2026-05},
  x-verified-status = {verified},
  x-verified-source = {https://doi.org/10.1016/j.ipm.2012.10.001},
}

@inproceedings{huang2019gpipe,
  title = {{GPipe}: Efficient Training of Giant Neural Networks using Pipeline Parallelism},
  author = {
    Huang, Yanping and Cheng, Youlong and Bapna, Ankur and Firat, Orhan and Chen, Dehao and Chen,
    Mia Xu and Lee, HyoukJoong and Ngiam, Jiquan and Le, Quoc V. and Wu, Yonghui and Chen, Zhifeng
  },
  year = {2019},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  publisher = {Curran Associates Inc.},
  volume = {32},
  pages = {103--112},
  url = {
    https://papers.nips.cc/paper/8305-gpipe-efficient-training-of-giant-neural-networks-using-pipeline-parallelism
  },
  note = {Pipeline parallelism for training models that exceed single-device memory},
  x-verified = {2026-05-03},
  x-verified-by = {claude-bib-audit-2026-05},
  x-verified-status = {verified},
  x-verified-source = {
    https://papers.nips.cc/paper/8305-gpipe-efficient-training-of-giant-neural-networks-using-pipeline-parallelism
  },
}

@book{huyen2022designing,
  title = {Designing Machine Learning Systems},
  author = {Huyen, Chip},
  year = {2022},
  publisher = {O'Reilly Media},
  isbn = {978-1098107963},
  note = {ML systems design textbook focused on production deployment},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@inproceedings{jimenez2024swebench,
  title = {{SWE-bench}: Can Language Models Resolve Real-World GitHub Issues?},
  author = {
    Jimenez, Carlos E and Yang, John and Wettig, Alexander and Yao, Shunyu and Pei, Kexin and
    Press, Ofir and Narasimhan, Karthik
  },
  year = {2024},
  booktitle = {International Conference on Learning Representations (ICLR)},
  publisher = {OpenReview.net},
  x-verified = {2026-04-26},
  x-verified-by = {bib-web-verify},
  x-verified-source = {https://openreview.net/forum?id=VTF8yNQM66},
}

@inproceedings{kwon2023,
  title = {Efficient Memory Management for Large Language Model Serving With PagedAttention},
  author = {
    Kwon, Woosuk and Li, Zhuohan and Zhuang, Siyuan and Sheng, Ying and Zheng, Lianmin and Yu, Cody
    Hao and Gonzalez, Joseph E. and Zhang, Hao and Stoica, Ion
  },
  year = {2023},
  booktitle = {Proceedings of the 29th Symposium on Operating Systems Principles},
  publisher = {ACM},
  pages = {611--626},
  doi = {10.1145/3600006.3613165},
  url = {https://doi.org/10.1145/3600006.3613165},
  note = {vLLM: virtual memory paging for KV-cache reduces fragmentation and enables higher throughput},
  source = {Crossref},
  x-verified = {2026-05-03},
  x-verified-by = {claude-bib-audit-2026-05},
  x-verified-status = {verified},
  x-verified-source = {
    https://dl.acm.org/doi/10.1145/3600006.3613165;
    https://dblp.org/rec/conf/sosp/KwonLZ0ZY0ZS23.html
  },
}

@misc{leetcode2024,
  title = {{LeetCode} --- Online Coding Platform},
  author = {{LeetCode Inc.}},
  year = {2024},
  url = {https://leetcode.com},
  note = {2,869+ validated coding problems with community-driven difficulty calibration},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://leetcode.com},
}

@inproceedings{lin2020mcunet,
  title = {{MCUNet}: Tiny Deep Learning on {IoT} Devices},
  author = {Lin, Ji and Chen, Wei-Ming and Lin, Yujun and Cohn, John and Gan, Chuang and Han, Song},
  year = {2020},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  publisher = {Curran Associates Inc.},
  pages = {11711--11722},
  url = {https://papers.nips.cc/paper/2020/hash/86c51678350f656dcc7f490a43946ee5-Abstract.html},
  x-verified = {2026-05-03},
  x-verified-by = {claude-bib-audit-2026-05},
  x-verified-status = {verified},
  x-verified-source = {https://papers.nips.cc/paper/2020/hash/86c51678350f656dcc7f490a43946ee5-Abstract.html},
}

@article{lin2025,
  title = {AWQ: Activation-Aware Weight Quantization for On-Device LLM Compression and Acceleration},
  author = {
    Lin, Ji and Tang, Jiaming and Tang, Haotian and Yang, Shang and Chen, Wei-Ming and Wang,
    Wei-Chen and Xiao, Guangxuan and Dang, Xingyu and Gan, Chuang and Han, Song
  },
  year = {2024},
  journal = {GetMobile: Mobile Computing and Communications},
  booktitle = {Proceedings of Machine Learning and Systems (MLSys)},
  publisher = {Association for Computing Machinery (ACM)},
  volume = {28},
  number = {4},
  pages = {12--17},
  doi = {10.1145/3714983.3714987},
  issn = {2375-0529, 2375-0537},
  url = {https://doi.org/10.1145/3714983.3714987},
  note = {AWQ: salient-weight-aware INT4 weight-only quantization for LLM serving},
  source = {Crossref},
  x-verified = {2026-04-26},
  x-verified-by = {paper-revision-fresh-reader-pass},
}

@book{lord2012,
  title = {Applications of Item Response Theory to Practical Testing Problems},
  author = {Lord, Frederic M.},
  year = {1980},
  publisher = {Routledge},
  doi = {10.4324/9780203056615},
  isbn = {9781136557248},
  url = {https://doi.org/10.4324/9780203056615},
  note = {Foundational IRT text. b-parameter (difficulty), a-parameter (discrimination)},
  source = {Crossref},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@inproceedings{mattson2020mlperf,
  title = {{MLPerf Training Benchmark}},
  author = {
    Mattson, Peter and Cheng, Christine and Coleman, Cody and Diamos, Greg and Micikevicius,
    Paulius and Patterson, David and Tang, Hanlin and Wei, Gu-Yeon and Bailis, Peter and Bittorf,
    Victor and Brooks, David and Chen, Dehao and Dutta, Debojyoti and Gupta, Udit and Hazelwood,
    Kim and Hock, Andrew and Huang, Xinyuan and Ike, Atsushi and Jia, Bill and Kang, Daniel and
    Kanter, David and Kumar, Naveen and Liao, Jeffery and Ma, Guokai and Narayanan, Deepak and
    Oguntebi, Tayo and Pekhimenko, Gennady and Pentecost, Lillian and Reddi, Vijay Janapa and
    Robie, Taylor and St~John, Tom and Wu, Carole-Jean and Xu, Lingjie and Young, Cliff and
    Zaharia, Matei
  },
  year = {2020},
  booktitle = {Proceedings of Machine Learning and Systems (MLSys)},
  publisher = {mlsys.org},
  volume = {2},
  pages = {336--349},
  url = {https://arxiv.org/abs/1910.01500},
  note = {The original MLPerf Training benchmark establishing standardized ML system measurement},
  eprint = {1910.01500},
  archiveprefix = {arXiv},
  x-verified = {2026-04-26},
  x-verified-by = {bib-web-verify},
  x-verified-source = {
    https://proceedings.mlsys.org/paper\_files/paper/2020/hash/411e39b117e885341f25efb8912945f7-Abstract.html
  },
}

@article{messick1995,
  title = {
    Validity of Psychological Assessment: Validation of Inferences From Persons' Responses and
    Performances as Scientific Inquiry Into Score Meaning
  },
  author = {Messick, Samuel},
  year = {1995},
  journal = {Am. Psychol.},
  publisher = {American Psychological Association (APA)},
  volume = {50},
  number = {9},
  pages = {741--749},
  doi = {10.1037/0003-066x.50.9.741},
  issn = {1935-990X, 0003-066X},
  url = {https://doi.org/10.1037/0003-066x.50.9.741},
  note = {Unified validity framework: content, construct, consequential validity},
  source = {Crossref},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://doi.org/10.1037/0003-066X.50.9.741},
}

@incollection{millman1989,
  title = {The Specification and Development of Tests of Achievement and Ability},
  author = {Millman, Jason and Greene, Jennifer},
  year = {1989},
  booktitle = {Educational Measurement},
  publisher = {American Council on Education / Macmillan},
  pages = {335--366},
  note = {Test blueprinting and tables of specifications; standard reference for coverage validation},
  editor = {Linn, Robert L.},
  edition = {3rd},
  x-verified = {2026-04-26},
  x-verified-by = {bib-web-verify},
  x-verified-source = {https://eric.ed.gov/?id=ED372105},
}

@article{mislevy2003,
  title = {A Brief Introduction to Evidence-Centered Design},
  author = {Mislevy, Robert J. and Almond, Russell G. and Lukas, Janice F.},
  year = {2003},
  journal = {ETS Research Report Series},
  publisher = {Wiley},
  volume = {2003},
  number = {1},
  doi = {10.1002/j.2333-8504.2003.tb01908.x},
  issn = {2330-8516, 2330-8516},
  url = {https://doi.org/10.1002/j.2333-8504.2003.tb01908.x},
  note = {ECD framework: claims, evidence, task features for assessment design},
  source = {Crossref},
  institution = {Educational Testing Service},
  type = {ETS Research Report},
  x-verified = {2026-04-26},
  x-verified-by = {bib-web-verify},
  x-verified-source = {https://www.ets.org/research/policy\_research\_reports/publications/report/2003/hsgs.html},
}

@misc{neetcode2024,
  title = {{NeetCode} 150 --- Curated Coding Interview Problems},
  author = {{NeetCode}},
  year = {2024},
  url = {https://neetcode.io/practice},
  note = {150 problems organized by pattern with difficulty progression},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://neetcode.io/practice},
}

@techreport{nvidia2022h100,
  title = {{NVIDIA H100} Tensor Core {GPU} Architecture},
  author = {{NVIDIA Corporation}},
  year = {2022},
  url = {https://resources.nvidia.com/en-us-tensor-core/gtc22-whitepaper-hopper},
  note = {H100 datasheet: 80 GB HBM3, 3.35 TB/s, 989 TFLOPS FP16 dense, 494 TFLOPS TF32 dense},
  institution = {NVIDIA Corporation},
  x-verified = {2026-04-26},
  x-verified-by = {paper-revision-fresh-reader-pass},
}

@techreport{nvidia2022orin,
  title = {{NVIDIA Jetson AGX Orin} Series Technical Brief},
  author = {{NVIDIA Corporation}},
  year = {2022},
  url = {https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/},
  note = {Orin AGX: 275 TOPS INT8 sparse, 60 W power envelope, 32 GB LPDDR5},
  institution = {NVIDIA Corporation},
  x-verified = {2026-04-26},
  x-verified-by = {paper-revision-fresh-reader-pass},
}

@techreport{openai2023gpt4,
  title = {GPT-4 Technical Report},
  author = {{OpenAI}},
  year = {2023},
  institution = {OpenAI},
  eprint = {2303.08774},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@techreport{qti2020,
  title = {{QTI} --- Question and Test Interoperability},
  author = {{1EdTech Consortium}},
  year = {2020},
  url = {https://www.1edtech.org/standards/qti},
  note = {XML-based standard for assessment item portability across platforms},
  institution = {1EdTech (formerly IMS Global)},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://www.1edtech.org/standards/qti},
}

@inproceedings{rajbhandari2020,
  title = {ZeRO: Memory Optimizations Toward Training Trillion Parameter Models},
  author = {Rajbhandari, Samyam and Rasley, Jeff and Ruwase, Olatunji and He, Yuxiong},
  year = {2020},
  booktitle = {
    SC20: International Conference for High Performance Computing, Networking, Storage and Analysis
  },
  publisher = {IEEE},
  pages = {1--16},
  doi = {10.1109/sc41405.2020.00024},
  url = {https://doi.org/10.1109/sc41405.2020.00024},
  note = {ZeRO optimizer partitioning eliminates memory redundancy in data parallelism},
  source = {Crossref},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://doi.org/10.1109/SC41405.2020.00024},
}

@inproceedings{rasley2020,
  title = {DeepSpeed},
  author = {Rasley, Jeff and Rajbhandari, Samyam and Ruwase, Olatunji and He, Yuxiong},
  year = {2020},
  booktitle = {
    Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery \&amp; Data
    Mining
  },
  publisher = {ACM},
  pages = {3505--3506},
  doi = {10.1145/3394486.3406703},
  url = {https://doi.org/10.1145/3394486.3406703},
  note = {
    Training system combining ZeRO, pipeline parallelism, and mixed precision for 100B+ parameter
    models
  },
  subtitle = {System Optimizations Enable Training Deep Learning Models With Over 100 Billion Parameters},
  source = {Crossref},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://doi.org/10.1145/3394486.3406703},
}

@inproceedings{reddi2020,
  title = {MLPerf Inference Benchmark},
  author = {
    Reddi, Vijay Janapa and Cheng, Christine and Kanter, David and Mattson, Peter and Schmuelling,
    Guenther and Wu, Carole-Jean and Anderson, Brian and Breughe, Maximilien and Charlebois, Mark
    and Chou, William and Chukka, Ramesh and Coleman, Cody and Davis, Sam and Deng, Pan and Diamos,
    Greg and Duke, Jared and Fick, Dave and Gardner, J. Scott and Hubara, Itay and Idgunji, Sachin
    and Jablin, Thomas B. and Jiao, Jeff and St. John, Tom and Kanwar, Pankaj and Lee, David and
    Liao, Jeffery and Lokhmotov, Anton and Massa, Francisco and Meng, Peng and Micikevicius,
    Paulius and Osborne, Colin and Pekhimenko, Gennady and Rajan, Arun Tejusve Raghunath and
    Sequeira, Dilip and Sirasao, Ashish and Sun, Fei and Tang, Hanlin and Thomson, Michael and Wei,
    Frank and Wu, Ephrem and Xu, Lingjie and Yamada, Koichi and Yu, Bing and Yuan, George and
    Zhong, Aaron and Zhang, Peizhao and Zhou, Yuchen
  },
  year = {2020},
  booktitle = {2020 ACM/IEEE 47th Annual International Symposium on Computer Architecture (ISCA)},
  publisher = {IEEE},
  pages = {446--459},
  doi = {10.1109/isca45697.2020.00045},
  url = {https://doi.org/10.1109/isca45697.2020.00045},
  note = {Standardized benchmarks for ML system performance across inference scenarios},
  source = {Crossref},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://doi.org/10.1109/ISCA45697.2020.00045},
}

@book{reddi2026a,
  title = {Machine Learning Systems at Scale},
  author = {Reddi, Vijay Janapa},
  year = {2026},
  publisher = {MIT Press},
  url = {https://mlsysbook.ai},
  note = {Volume II of the Machine Learning Systems textbook},
  x-verified = {2026-04-26},
  x-verified-by = {bib-web-verify},
  x-verified-source = {https://mlsysbook.ai/},
}

@book{reddi2026mlsys,
  title = {Machine Learning Systems},
  author = {Reddi, Vijay Janapa},
  year = {2026},
  publisher = {MIT Press},
  url = {https://mlsysbook.ai},
  note = {
    Two-volume textbook on ML systems following the Hennessy \& Patterson pedagogical model.
    Volume~I covers single-machine systems; Volume~II covers distributed systems at scale.
  },
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://mlsysbook.ai},
}

@software{reddi2026mlsysim,
  title = {{MLSys$\cdot$im}: First-Principles Infrastructure Modeling for Machine Learning Systems},
  author = {Reddi, Vijay Janapa},
  year = {2026},
  url = {https://mlsysbook.ai/mlsysim},
  note = {
    Companion analytical modeling framework for the ML Systems textbook. Provides shared hardware
    constants, roofline analysis, and quantitative verification of systems reasoning
  },
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://mlsysbook.ai/mlsysim},
}

@article{reddi2026tinytorch,
  title = {TinyTorch: Building Machine Learning Systems from First Principles},
  author = {Reddi, Vijay Janapa},
  year = {2026},
  journal = {arXiv preprint arXiv:2601.19107},
  note = {
    Educational ML framework with 20 modules teaching ML as systems engineering from first
    principles
  },
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@inproceedings{reimers2019,
  title = {Sentence-BERT: Sentence Embeddings Using Siamese BERT-Networks},
  author = {Reimers, Nils and Gurevych, Iryna},
  year = {2019},
  booktitle = {
    Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the
    9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
  },
  publisher = {Association for Computational Linguistics},
  pages = {3980--3990},
  doi = {10.18653/v1/d19-1410},
  url = {https://doi.org/10.18653/v1/d19-1410},
  note = {Sentence embeddings used for semantic deduplication of question scenarios},
  source = {Crossref},
  x-verified = {2026-04-26},
  x-verified-by = {paper-revision-fresh-reader-pass},
}

@article{shoeybi2019megatron,
  title = {Megatron-{LM}: Training Multi-Billion Parameter Language Models Using Model Parallelism},
  author = {
    Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared
    and Catanzaro, Bryan
  },
  year = {2019},
  journal = {arXiv preprint arXiv:1909.08053},
  note = {Tensor parallelism for training large language models across GPUs},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@techreport{skos2009,
  title = {{SKOS} Simple Knowledge Organization System Reference},
  author = {{W3C}},
  year = {2009},
  url = {https://www.w3.org/TR/skos-reference/},
  note = {Standard for broader/narrower/related concept relationships},
  institution = {World Wide Web Consortium},
  type = {W3C Recommendation},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://www.w3.org/TR/skos-reference/},
}

@book{soergel1985organizing,
  title = {Organizing Information: Principles of Data Base and Retrieval Systems},
  author = {Soergel, Dagobert},
  year = {1985},
  publisher = {Academic Press},
  note = {User warrant vs literary warrant in knowledge organization},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@misc{tay2024interview,
  title = {Grind 75: Interview Problem List},
  author = {Tay, Yangshun},
  year = {2024},
  url = {https://www.techinterviewhandbook.org/grind75},
  note = {169 problems ranked by frequency, pattern coverage, and difficulty},
  howpublished = {Tech Interview Handbook},
}

@misc{thompson2011sympathy,
  title = {Mechanical Sympathy},
  author = {Thompson, Martin},
  year = {2011},
  url = {https://mechanical-sympathy.blogspot.com/},
  note = {
    Blog. The term describes software that works with the hardware rather than against it, inspired
    by racing driver Jackie Stewart's philosophy
  },
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
  x-verified-source = {https://mechanical-sympathy.blogspot.com/},
}

@article{webb1997dok,
  title = {Criteria for Alignment of Expectations and Assessments in Mathematics and Science Education},
  author = {Webb, Norman L.},
  year = {1997},
  journal = {Research Monograph No. 6},
  publisher = {Council of Chief State School Officers},
  volume = {6},
  note = {
    Depth of Knowledge (DOK) framework: four levels of cognitive complexity for assessment
    alignment
  },
  x-verified = {2026-05-04},
  x-verified-by = {openai-MODEL},
  x-verified-status = {verified},
  x-verified-source = {https://eric.ed.gov/?id=ED414305},
}

@book{wiggins2005understanding,
  title = {Understanding by Design},
  author = {Wiggins, Grant and McTighe, Jay},
  year = {2005},
  publisher = {Association for Supervision and Curriculum Development},
  note = {Backward design methodology: desired results, acceptable evidence, then learning plan},
  edition = {2nd},
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@article{williams2009,
  title = {Roofline},
  author = {Williams, Samuel and Waterman, Andrew and Patterson, David},
  year = {2009},
  journal = {Communications of the ACM},
  publisher = {Association for Computing Machinery (ACM)},
  volume = {52},
  number = {4},
  pages = {65--76},
  doi = {10.1145/1498765.1498785},
  issn = {0001-0782, 1557-7317},
  url = {https://doi.org/10.1145/1498765.1498785},
  note = {
    Canonical roofline model: arithmetic intensity, ridge point, compute- vs memory-bound
    classification
  },
  subtitle = {An Insightful Visual Performance Model for Multicore Architectures},
  source = {Crossref},
  x-verified = {2026-05-03},
  x-verified-by = {claude-bib-audit-2026-05},
  x-verified-status = {verified},
  x-verified-source = {https://doi.org/10.1145/1498765.1498785},
}

@inproceedings{yu2022orca,
  title = {{Orca}: A Distributed Serving System for Transformer-Based Generative Models},
  author = {Yu, Gyeong-In and Jeong, Joo Seong and Kim, Geon-Woo and Kim, Soojeong and Chun, Byung-Gon},
  year = {2022},
  booktitle = {
    Proceedings of the 16th USENIX Symposium on Operating Systems Design and Implementation (OSDI)
  },
  publisher = {USENIX Association},
  pages = {521--538},
  note = {
    Iteration-level scheduling (continuous batching) for transformer serving, eliminating padding
    waste
  },
  x-verified = {2026-04-09},
  x-verified-by = {pass-17-bib-hygiene},
}

@inproceedings{zheng2023judging,
  title = {Judging LLM-As-A-Judge With MT-Bench and Chatbot Arena},
  author = {
    Zheng, Lianmin and Chiang, Wei-Lin and Sheng, Ying and Zhuang, Siyuan and Wu, Zhanghao and
    Zhuang, Yonghao and Lin, Zi and Li, Zhuohan and Li, Dacheng and Xing, Eric P. and Zhang, Hao
    and Gonzalez, Joseph E. and Stoica, Ion
  },
  year = {2023},
  booktitle = {Advances in Neural Information Processing Systems 36},
  publisher = {Neural Information Processing Systems Foundation, Inc. (NeurIPS)},
  pages = {46595--46623},
  doi = {10.52202/075280-2020},
  url = {https://doi.org/10.52202/075280-2020},
  note = {NeurIPS 2023 Datasets and Benchmarks track},
  source = {Crossref},
  x-verified = {2026-05-03},
  x-verified-by = {claude-bib-audit-2026-05},
  x-verified-status = {verified},
}