Files
cs249r_book/mlsysim/docs/references.bib
Vijay Janapa Reddi aed43c5b81 docs: clean up landing page and centralize math foundations
- Elevate 5-Layer Progressive Lowering mental model to architecture.qmd

- Clean up landing page copy to be a punchy one-liner

- Re-render architecture composition diagram as SVG for reliability

- Move math derivations out of tutorials and into math.qmd with citations

- Add DGX Spark to Silicon Zoo
2026-03-07 18:37:06 -05:00

183 lines
7.2 KiB
BibTeX

@article{williams2009roofline,
title = {Roofline: An Insightful Visual Performance Model for Multicore Architectures},
author = {Williams, Samuel and Waterman, Andrew and Patterson, David},
journal = {Communications of the ACM},
volume = {52},
number = {4},
pages = {65--76},
year = {2009},
publisher = {ACM},
doi = {10.1145/1498765.1498785}
}
@inproceedings{mlperf2020,
title = {{MLPerf}: An Industry Standard Benchmark Suite for Machine Learning Performance},
author = {Mattson, Peter and Cheng, Christine and Diamos, Gregory and others},
booktitle = {IEEE/ACM International Symposium on Microarchitecture (MICRO)},
year = {2020},
doi = {10.1109/MICRO50266.2020.00045}
}
@inproceedings{rasley2020deepspeed,
title = {{DeepSpeed}: System Optimizations Enable Training Deep Learning Models with Over 100 Billion Parameters},
author = {Rasley, Jeff and Rajbhandari, Samyam and Ruwase, Olatunji and He, Yuxiong},
booktitle = {ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
year = {2020},
doi = {10.1145/3394486.3406703}
}
@article{shoeybi2019megatron,
title = {{Megatron-LM}: Training Multi-Billion Parameter Language Models Using Model Parallelism},
author = {Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared and Catanzaro, Bryan},
journal = {arXiv preprint arXiv:1909.08053},
year = {2019}
}
@article{young1974first,
title = {A First Order Approximation to the Optimum Checkpoint Interval},
author = {Young, John W.},
journal = {Communications of the ACM},
volume = {17},
number = {9},
pages = {530--531},
year = {1974},
doi = {10.1145/361147.361115}
}
@article{daly2006higher,
title = {A Higher Order Estimate of the Optimum Checkpoint Interval for Restart Dumps},
author = {Daly, John T.},
journal = {Future Generation Computer Systems},
volume = {22},
number = {3},
pages = {303--312},
year = {2006},
doi = {10.1016/j.future.2004.11.016}
}
@book{mlsysbook2024,
title = {Machine Learning Systems: Principles and Practices of Engineering Artificially Intelligent Systems},
author = {Reddi, Vijay Janapa and others},
year = {2024},
publisher = {Harvard University},
url = {https://mlsysbook.ai}
}
@book{hennessy2019architecture,
title = {Computer Architecture: A Quantitative Approach},
author = {Hennessy, John L. and Patterson, David A.},
edition = {6th},
year = {2019},
publisher = {Morgan Kaufmann},
isbn = {978-0128119051}
}
@inproceedings{jouppi2017datacenter,
title = {In-Datacenter Performance Analysis of a Tensor Processing Unit},
author = {Jouppi, Norman P. and Young, Cliff and Patil, Nishant and others},
booktitle = {Proceedings of the 44th Annual International Symposium on Computer Architecture (ISCA)},
pages = {1--12},
year = {2017},
doi = {10.1145/3079856.3080246}
}
@article{dean2012large,
title = {Large Scale Distributed Deep Networks},
author = {Dean, Jeffrey and Corrado, Greg S. and Monga, Rajat and others},
journal = {Advances in Neural Information Processing Systems},
volume = {25},
year = {2012}
}
@inproceedings{amodei2018ai,
title = {{AI} and Compute},
author = {Amodei, Dario and Hernandez, Danny},
booktitle = {OpenAI Blog},
year = {2018},
url = {https://openai.com/blog/ai-and-compute}
}
@article{patterson2022carbon,
title = {Carbon Emissions and Large Neural Network Training},
author = {Patterson, David and Gonzalez, Joseph and Le, Quoc and others},
journal = {arXiv preprint arXiv:2104.10350},
year = {2022}
}
@inproceedings{rajbhandari2020zero,
title = {{ZeRO}: Memory Optimizations Toward Training Trillion Parameter Models},
author = {Rajbhandari, Samyam and Rasley, Jeff and Ruwase, Olatunji and He, Yuxiong},
booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC)},
year = {2020},
doi = {10.1109/SC41405.2020.00024}
}
@article{kaplan2020scaling,
title = {Scaling Laws for Neural Language Models},
author = {Kaplan, Jared and McCandlish, Sam and Henighan, Tom and others},
journal = {arXiv preprint arXiv:2001.08361},
year = {2020}
}
@inproceedings{kwon2023efficient,
title = {Efficient Memory Management for Large Language Model Serving with {PagedAttention}},
author = {Kwon, Woosuk and Li, Zhuohan and Zhuang, Siyuan and others},
booktitle = {Proceedings of the 29th ACM Symposium on Operating Systems Principles (SOSP)},
year = {2023},
doi = {10.1145/3600006.3613165}
}
@misc{nvidia2023h100,
title = {{NVIDIA H100 Tensor Core GPU} Datasheet},
author = {{NVIDIA Corporation}},
year = {2023},
howpublished = {\url{https://www.nvidia.com/en-us/data-center/h100/}},
note = {Accessed: 2024-06-15}
}
@inproceedings{won2023astrasim2,
title = {{ASTRA-sim2.0}: Modeling Hierarchical Networks and Disaggregated Systems for Large-model Training at Scale},
author = {Won, William and Heo, Taekyung and Rashidi, Saeed and Sridharan, Srinivas and Srinivasan, Sudarshan and Krishna, Tushar},
booktitle = {IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)},
year = {2023},
doi = {10.1109/ISPASS57527.2023.00035}
}
@inproceedings{calculon2023,
title = {Calculon: a Methodology and Tool for High-Level Co-Design of Systems and Large Language Models},
author = {Isaev, Mikhail and McDonald, Nic and Dennison, Larry and Vuduc, Richard},
booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC)},
year = {2023},
doi = {10.1145/3581784.3607102}
}
@inproceedings{parashar2019timeloop,
title = {Timeloop: A Systematic Approach to {DNN} Accelerator Evaluation},
author = {Parashar, Angshuman and Raina, Priyanka and Shao, Yakun Sophia and Chen, Yu-Hsin and Emer, Joel and others},
booktitle = {IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)},
year = {2019},
doi = {10.1109/ISPASS.2019.00042}
}
@inproceedings{wu2019accelergy,
title = {Accelergy: An Architecture-Level Energy Estimation Methodology for Accelerator Designs},
author = {Wu, Yannan Nellie and Emer, Joel S. and Sze, Vivienne},
booktitle = {IEEE/ACM International Conference on Computer-Aided Design (ICCAD)},
year = {2019},
doi = {10.1109/ICCAD45719.2019.8942149}
}
@article{narayanan2021efficient,
title = {Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM},
author = {Narayanan, Deepak and Shoeybi, Mohammad and Casper, Jared and others},
journal = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC)},
year = {2021}
}
@article{shazeer2017outrageously,
title = {Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer},
author = {Shazeer, Noam and Mirhoseini, Azalia and Maziarz, Krzysztof and others},
journal = {arXiv preprint arXiv:1701.06538},
year = {2017}
}