@article{dahl2023benchmarking,
 author = {Dahl, George E and Schneider, Frank and Nado, Zachary and Agarwal, Naman and Sastry, Chandramouli Shama and Hennig, Philipp and Medapati, Sourabh and Eschenhagen, Runa and Kasimbeg, Priya and Suo, Daniel and others},
 doi = {10.1212/nxi.0000000000001086},
 issn = {2332-7812},
 journal = {Neurology Neuroimmunology \&amp; Neuroinflammation},
 number = {6},
 publisher = {Ovid Technologies (Wolters Kluwer Health)},
 source = {Crossref},
 title = {{CSF} Findings in Acute {NMDAR} and {LGI1} {Antibody{\textendash}Associated} Autoimmune Encephalitis},
 url = {https://doi.org/10.1212/nxi.0000000000001086},
 volume = {8},
 year = {2021}
}

@inproceedings{diederik2015adam,
 author = {Diederik P. Kingma and Jimmy Ba},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/journals/corr/KingmaB14.bib},
 booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
 editor = {Yoshua Bengio and Yann LeCun},
 timestamp = {Thu, 25 Jul 2019 01:00:00 +0200},
 title = {Adam: {A} Method for Stochastic Optimization},
 url = {http://arxiv.org/abs/1412.6980},
 year = {2015}
}

@inproceedings{glorot2010understanding,
 author = {Glorot, Xavier and Bengio, Yoshua},
 booktitle = {Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics},
 title = {Understanding the difficulty of training deep feedforward neural networks},
 url = {https://proceedings.mlr.press/v9/glorot10a.html},
 year = {2010}
}

@misc{hinton2017overview,
 author = {Hinton, Geoffrey},
 howpublished = {University Lecture},
 institution = {University of Toronto},
 title = {Overview of Minibatch Gradient Descent},
 year = {2017}
}

@inproceedings{jasper2012practical,
 author = {Jasper Snoek and Hugo Larochelle and Ryan P. Adams},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/nips/SnoekLA12.bib},
 booktitle = {Advances in Neural Information Processing Systems 25: 26th Annual Conference on Neural Information Processing Systems 2012. Proceedings of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States},
 editor = {Peter L. Bartlett and Fernando C. N. Pereira and Christopher J. C. Burges and L{\'{e}}on Bottou and Kilian Q. Weinberger},
 pages = {2960--2968},
 timestamp = {Thu, 21 Jan 2021 00:00:00 +0100},
 title = {Practical Bayesian Optimization of Machine Learning Algorithms},
 url = {https://proceedings.neurips.cc/paper/2012/hash/05311655a15b75fab86956663e1819cd-Abstract.html},
 year = {2012}
}

@inproceedings{john2010adaptive,
 author = {John C. Duchi and Elad Hazan and Yoram Singer},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/colt/DuchiHS10.bib},
 booktitle = {{COLT} 2010 - The 23rd Conference on Learning Theory, Haifa, Israel, June 27-29, 2010},
 editor = {Adam Tauman Kalai and Mehryar Mohri},
 pages = {257--269},
 publisher = {Omnipress},
 timestamp = {Tue, 19 Feb 2013 00:00:00 +0100},
 title = {Adaptive Subgradient Methods for Online Learning and Stochastic Optimization},
 url = {http://colt2010.haifa.il.ibm.com/papers/COLT2010proceedings.pdf\#page=265},
 year = {2010}
}

@inproceedings{kaiming2015delving,
 author = {Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/iccv/HeZRS15.bib},
 booktitle = {2015 {IEEE} International Conference on Computer Vision, {ICCV} 2015, Santiago, Chile, December 7-13, 2015},
 doi = {10.1109/ICCV.2015.123},
 pages = {1026--1034},
 publisher = {{IEEE} Computer Society},
 timestamp = {Wed, 17 Apr 2019 01:00:00 +0200},
 title = {Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification},
 url = {https://doi.org/10.1109/ICCV.2015.123},
 year = {2015}
}

@article{robbins1951stochastic,
 author = {Robbins, Herbert and Monro, Sutton},
 doi = {10.1214/aoms/1177729586},
 issn = {0003-4851},
 journal = {The Annals of Mathematical Statistics},
 number = {3},
 pages = {400--407},
 publisher = {Institute of Mathematical Statistics},
 source = {Crossref},
 title = {A Stochastic Approximation Method},
 url = {https://doi.org/10.1214/aoms/1177729586},
 volume = {22},
 year = {1951}
}

@article{ruder2016overview,
 author = {Ruder, Sebastian},
 journal = {ArXiv preprint},
 title = {An overview of gradient descent optimization algorithms},
 url = {https://arxiv.org/abs/1609.04747},
 volume = {abs/1609.04747},
 year = {2016}
}

@article{srivastava2014dropout,
 author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
 journal = {J. Mach. Learn. Res.},
 title = {Dropout: {A} Simple Way to Prevent Neural Networks from Overfitting},
 url = {http://jmlr.org/papers/v15/srivastava14a.html},
 year = {2014}
}

@misc{torsten2021sparsity,
 archiveprefix = {arXiv},
 author = {Torsten Hoefler and Dan Alistarh and Tal Ben-Nun and Nikoli Dryden and Alexandra Peste},
 eprint = {2102.00554},
 primaryclass = {cs.LG},
 title = {Sparsity in Deep Learning: Pruning and growth for efficient inference and training in neural networks},
 year = {2021}
}

@misc{yang2018imagenet,
 archiveprefix = {arXiv},
 author = {Yang You and Zhao Zhang and Cho-Jui Hsieh and James Demmel and Kurt Keutzer},
 eprint = {1709.05011},
 primaryclass = {cs.CV},
 title = {ImageNet Training in Minutes},
 year = {2018}
}

@misc{you2018imagenet,
 archiveprefix = {arXiv},
 author = {You, Yang and Zhang, Zhao and Hsieh, Cho-Jui and Demmel, James and Keutzer, Kurt},
 eprint = {1709.05011},
 primaryclass = {cs.CV},
 title = {{ImageNet} Training in Minutes},
 year = {2018}
}

@misc{zeiler2012reinforcement,
 archiveprefix = {arXiv},
 author = {Zeiler, Matthew D.},
 doi = {10.1002/9781118266502.ch6},
 eprint = {1212.5701},
 isbn = {9780470919996, 9781118266502},
 pages = {119--149},
 primaryclass = {cs.LG},
 publisher = {Wiley},
 source = {Crossref},
 title = {Reinforcement and Systemic Machine Learning for Decision Making},
 url = {https://doi.org/10.1002/9781118266502.ch6},
 year = {2012}
}

@misc{zoph2023cybernetical,
 archiveprefix = {arXiv},
 author = {Zoph, Barret and Le, Quoc V.},
 doi = {10.1002/9781394217519.ch17},
 eprint = {1611.01578},
 isbn = {9781394217489, 9781394217519},
 pages = {367--392},
 primaryclass = {cs.LG},
 publisher = {Wiley},
 source = {Crossref},
 title = {Cybernetical Intelligence},
 url = {https://doi.org/10.1002/9781394217519.ch17},
 year = {2023}
}