@article{dahl2023benchmarking, author = {Dahl, George E and Schneider, Frank and Nado, Zachary and Agarwal, Naman and Sastry, Chandramouli Shama and Hennig, Philipp and Medapati, Sourabh and Eschenhagen, Runa and Kasimbeg, Priya and Suo, Daniel and others}, doi = {10.1212/nxi.0000000000001086}, issn = {2332-7812}, journal = {Neurology Neuroimmunology \& Neuroinflammation}, number = {6}, publisher = {Ovid Technologies (Wolters Kluwer Health)}, source = {Crossref}, title = {{CSF} Findings in Acute {NMDAR} and {LGI1} {Antibody{\textendash}Associated} Autoimmune Encephalitis}, url = {https://doi.org/10.1212/nxi.0000000000001086}, volume = {8}, year = {2021} } @inproceedings{diederik2015adam, author = {Diederik P. Kingma and Jimmy Ba}, bibsource = {dblp computer science bibliography, https://dblp.org}, biburl = {https://dblp.org/rec/journals/corr/KingmaB14.bib}, booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings}, editor = {Yoshua Bengio and Yann LeCun}, timestamp = {Thu, 25 Jul 2019 01:00:00 +0200}, title = {Adam: {A} Method for Stochastic Optimization}, url = {http://arxiv.org/abs/1412.6980}, year = {2015} } @inproceedings{glorot2010understanding, author = {Glorot, Xavier and Bengio, Yoshua}, booktitle = {Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics}, title = {Understanding the difficulty of training deep feedforward neural networks}, url = {https://proceedings.mlr.press/v9/glorot10a.html}, year = {2010} } @misc{hinton2017overview, author = {Hinton, Geoffrey}, howpublished = {University Lecture}, institution = {University of Toronto}, title = {Overview of Minibatch Gradient Descent}, year = {2017} } @inproceedings{jasper2012practical, author = {Jasper Snoek and Hugo Larochelle and Ryan P. Adams}, bibsource = {dblp computer science bibliography, https://dblp.org}, biburl = {https://dblp.org/rec/conf/nips/SnoekLA12.bib}, booktitle = {Advances in Neural Information Processing Systems 25: 26th Annual Conference on Neural Information Processing Systems 2012. Proceedings of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, United States}, editor = {Peter L. Bartlett and Fernando C. N. Pereira and Christopher J. C. Burges and L{\'{e}}on Bottou and Kilian Q. Weinberger}, pages = {2960--2968}, timestamp = {Thu, 21 Jan 2021 00:00:00 +0100}, title = {Practical Bayesian Optimization of Machine Learning Algorithms}, url = {https://proceedings.neurips.cc/paper/2012/hash/05311655a15b75fab86956663e1819cd-Abstract.html}, year = {2012} } @inproceedings{john2010adaptive, author = {John C. Duchi and Elad Hazan and Yoram Singer}, bibsource = {dblp computer science bibliography, https://dblp.org}, biburl = {https://dblp.org/rec/conf/colt/DuchiHS10.bib}, booktitle = {{COLT} 2010 - The 23rd Conference on Learning Theory, Haifa, Israel, June 27-29, 2010}, editor = {Adam Tauman Kalai and Mehryar Mohri}, pages = {257--269}, publisher = {Omnipress}, timestamp = {Tue, 19 Feb 2013 00:00:00 +0100}, title = {Adaptive Subgradient Methods for Online Learning and Stochastic Optimization}, url = {http://colt2010.haifa.il.ibm.com/papers/COLT2010proceedings.pdf\#page=265}, year = {2010} } @inproceedings{kaiming2015delving, author = {Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun}, bibsource = {dblp computer science bibliography, https://dblp.org}, biburl = {https://dblp.org/rec/conf/iccv/HeZRS15.bib}, booktitle = {2015 {IEEE} International Conference on Computer Vision, {ICCV} 2015, Santiago, Chile, December 7-13, 2015}, doi = {10.1109/ICCV.2015.123}, pages = {1026--1034}, publisher = {{IEEE} Computer Society}, timestamp = {Wed, 17 Apr 2019 01:00:00 +0200}, title = {Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification}, url = {https://doi.org/10.1109/ICCV.2015.123}, year = {2015} } @article{robbins1951stochastic, author = {Robbins, Herbert and Monro, Sutton}, doi = {10.1214/aoms/1177729586}, issn = {0003-4851}, journal = {The Annals of Mathematical Statistics}, number = {3}, pages = {400--407}, publisher = {Institute of Mathematical Statistics}, source = {Crossref}, title = {A Stochastic Approximation Method}, url = {https://doi.org/10.1214/aoms/1177729586}, volume = {22}, year = {1951} } @article{ruder2016overview, author = {Ruder, Sebastian}, journal = {ArXiv preprint}, title = {An overview of gradient descent optimization algorithms}, url = {https://arxiv.org/abs/1609.04747}, volume = {abs/1609.04747}, year = {2016} } @article{srivastava2014dropout, author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan}, journal = {J. Mach. Learn. Res.}, title = {Dropout: {A} Simple Way to Prevent Neural Networks from Overfitting}, url = {http://jmlr.org/papers/v15/srivastava14a.html}, year = {2014} } @misc{torsten2021sparsity, archiveprefix = {arXiv}, author = {Torsten Hoefler and Dan Alistarh and Tal Ben-Nun and Nikoli Dryden and Alexandra Peste}, eprint = {2102.00554}, primaryclass = {cs.LG}, title = {Sparsity in Deep Learning: Pruning and growth for efficient inference and training in neural networks}, year = {2021} } @misc{yang2018imagenet, archiveprefix = {arXiv}, author = {Yang You and Zhao Zhang and Cho-Jui Hsieh and James Demmel and Kurt Keutzer}, eprint = {1709.05011}, primaryclass = {cs.CV}, title = {ImageNet Training in Minutes}, year = {2018} } @misc{you2018imagenet, archiveprefix = {arXiv}, author = {You, Yang and Zhang, Zhao and Hsieh, Cho-Jui and Demmel, James and Keutzer, Kurt}, eprint = {1709.05011}, primaryclass = {cs.CV}, title = {{ImageNet} Training in Minutes}, year = {2018} } @misc{zeiler2012reinforcement, archiveprefix = {arXiv}, author = {Zeiler, Matthew D.}, doi = {10.1002/9781118266502.ch6}, eprint = {1212.5701}, isbn = {9780470919996, 9781118266502}, pages = {119--149}, primaryclass = {cs.LG}, publisher = {Wiley}, source = {Crossref}, title = {Reinforcement and Systemic Machine Learning for Decision Making}, url = {https://doi.org/10.1002/9781118266502.ch6}, year = {2012} } @misc{zoph2023cybernetical, archiveprefix = {arXiv}, author = {Zoph, Barret and Le, Quoc V.}, doi = {10.1002/9781394217519.ch17}, eprint = {1611.01578}, isbn = {9781394217489, 9781394217519}, pages = {367--392}, primaryclass = {cs.LG}, publisher = {Wiley}, source = {Crossref}, title = {Cybernetical Intelligence}, url = {https://doi.org/10.1002/9781394217519.ch17}, year = {2023} }