mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-30 09:38:38 -05:00
This commit includes: - Bibliography reformatting across all Volume I chapters - Updated cross-references in Vol II chapters - Added 'fpr' to codespell ignore list - Updated symlink to point to vol1 PDF config Changes span both volumes as part of ongoing volume restructure work.
1975 lines
71 KiB
BibTeX
1975 lines
71 KiB
BibTeX
@article{alexnet2012,
|
||
title = {ImageNet classification with deep convolutional neural networks},
|
||
author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E.},
|
||
journal = {Communications of the ACM},
|
||
booktitle = {Advances in Neural Information Processing Systems},
|
||
publisher = {Association for Computing Machinery (ACM)},
|
||
volume = {60},
|
||
number = {6},
|
||
pages = {84--90},
|
||
doi = {10.1145/3065386},
|
||
issn = {0001-0782,1557-7317},
|
||
url = {https://doi.org/10.1145/3065386},
|
||
source = {Crossref},
|
||
date = {2017-05-24},
|
||
editor = {Pereira, F. and Burges, C.J. and Bottou, L. and Weinberger, K.Q.},
|
||
}
|
||
|
||
@article{annette2020,
|
||
title = {ANNETTE: Accurate Neural Network Execution Time Estimation With Stacked Models},
|
||
author = {
|
||
Wess, Matthias and Ivanov, Matvey and Unger, Christoph and Nookala, Anvesh and Wendt, Alexander
|
||
and Jantsch, Axel
|
||
},
|
||
journal = {IEEE Access},
|
||
publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
|
||
volume = {9},
|
||
pages = {3545--3556},
|
||
doi = {10.1109/access.2020.3047259},
|
||
issn = {2169-3536},
|
||
url = {https://doi.org/10.1109/access.2020.3047259},
|
||
source = {Crossref},
|
||
date = {2021},
|
||
}
|
||
|
||
@article{ba2014deep,
|
||
title = {Do Deep Nets Really Need to be Deep?},
|
||
author = {Ba, Jimmy and Caruana, Rich},
|
||
year = {2014},
|
||
journal = {Advances in Neural Information Processing Systems (NeurIPS)},
|
||
volume = {27},
|
||
}
|
||
|
||
@article{banbury2020benchmarking,
|
||
title = {Benchmarking TinyML Systems: Challenges and Direction},
|
||
author = {
|
||
Banbury, Colby R. and Reddi, Vijay Janapa and Lam, Max and Fu, William and Fazel, Amin and
|
||
Holleman, Jeremy and Huang, Xinyuan and Hurtado, Robert and Kanter, David and Lokhmotov, Anton
|
||
and Patterson, David and Pau, Danilo and Seo, Jae-sun and Sieracki, Jeff and Thakker, Urmish
|
||
and Verhelst, Marian and Yadav, Poonam
|
||
},
|
||
journal = {arXiv preprint arXiv:2003.04821},
|
||
url = {http://arxiv.org/abs/2003.04821v4},
|
||
date = {2020-03-10},
|
||
primaryclass = {cs.PF},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{Bellec2018,
|
||
title = {Memory-Efficient Deep Learning on a SpiNNaker 2 Prototype},
|
||
author = {
|
||
Liu, Chen and Bellec, Guillaume and Vogginger, Bernhard and Kappel, David and Partzsch,
|
||
Johannes and Neum\"{a}rker, Felix and H\"{o}ppner, Sebastian and Maass, Wolfgang and Furber,
|
||
Steve B. and Legenstein, Robert and Mayr, Christian G.
|
||
},
|
||
journal = {Frontiers in Neuroscience},
|
||
booktitle = {International Conference on Learning Representations (ICLR)},
|
||
publisher = {Frontiers Media SA},
|
||
volume = {12},
|
||
pages = {840},
|
||
doi = {10.3389/fnins.2018.00840},
|
||
issn = {1662-453X},
|
||
url = {https://doi.org/10.3389/fnins.2018.00840},
|
||
source = {Crossref},
|
||
date = {2018-11-16},
|
||
essn = {1662-453X},
|
||
}
|
||
|
||
@article{Bengio2013,
|
||
title = {Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation},
|
||
author = {Bengio, Yoshua and L\'{e}onard, Nicholas and Courville, Aaron},
|
||
journal = {arXiv preprint},
|
||
url = {http://arxiv.org/abs/1308.3432v1},
|
||
date = {2013-08-15},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
eprint = {1308.3432},
|
||
}
|
||
|
||
@article{bengio2013estimating,
|
||
title = {Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation},
|
||
author = {Bengio, Yoshua and L\'{e}onard, Nicholas and Courville, Aaron},
|
||
journal = {arXiv preprint arXiv:1308.3432},
|
||
url = {http://arxiv.org/abs/1308.3432v1},
|
||
date = {2013-08-15},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{bengio2015conditional,
|
||
title = {Conditional Computation in Neural Networks for faster models},
|
||
author = {Bengio, Emmanuel and Bacon, Pierre-Luc and Pineau, Joelle and Precup, Doina},
|
||
journal = {arXiv preprint arXiv:1511.06297},
|
||
url = {http://arxiv.org/abs/1511.06297v2},
|
||
date = {2015-11-19},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{Bergstra2011,
|
||
title = {Adaptive MCMC with online relabeling},
|
||
author = {Bardenet, R\'{e}mi and Capp\'{e}, Olivier and Fort, Gersende and K\'{e}gl, Bal\'{a}zs},
|
||
journal = {Bernoulli},
|
||
publisher = {Bernoulli Society for Mathematical Statistics and Probability},
|
||
volume = {21},
|
||
number = {3},
|
||
doi = {10.3150/13-bej578},
|
||
issn = {1350-7265},
|
||
url = {https://doi.org/10.3150/13-bej578},
|
||
source = {Crossref},
|
||
date = {2015-08-01},
|
||
}
|
||
|
||
@phdthesis{blalock2020state,
|
||
title = {Neural Network Pruning for ECG Arrhythmia Classification},
|
||
author = {Labarge, Isaac E},
|
||
journal = {Proceedings of Machine Learning and Systems (MLSys)},
|
||
volume = {2},
|
||
pages = {129--146},
|
||
doi = {10.15368/theses.2020.76},
|
||
url = {https://doi.org/10.15368/theses.2020.76},
|
||
source = {Crossref},
|
||
school = {California Polytechnic State University},
|
||
}
|
||
|
||
@article{brown2020gpt3,
|
||
title = {Language Models are Few-Shot Learners},
|
||
author = {
|
||
Brown, Tom B. and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared and
|
||
Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Saxena, Girish and Arora,
|
||
Sandhini and others
|
||
},
|
||
year = {2020},
|
||
journal = {Advances in Neural Information Processing Systems},
|
||
volume = {33},
|
||
pages = {1877--1901},
|
||
}
|
||
|
||
@inproceedings{cai2018proxylessnas,
|
||
title = {ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware},
|
||
author = {Cai, Han and Zhu, Ligeng and Han, Song},
|
||
year = {2019},
|
||
booktitle = {
|
||
7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May
|
||
6-9, 2019
|
||
},
|
||
publisher = {OpenReview.net},
|
||
url = {https://openreview.net/forum?id=HylVB3AqYm},
|
||
bibsource = {dblp computer science bibliography, https://dblp.org},
|
||
biburl = {https://dblp.org/rec/conf/iclr/CaiZH19.bib},
|
||
timestamp = {Tue, 24 Nov 2020 00:00:00 +0100},
|
||
}
|
||
|
||
@inproceedings{Cai2020,
|
||
title = {Once-for-All: Train One Network and Specialize it for Efficient Deployment},
|
||
author = {Cai, Han and Gan, Chuang and Han, Song},
|
||
year = {2020},
|
||
booktitle = {International Conference on Learning Representations},
|
||
}
|
||
|
||
@article{chen2016training,
|
||
title = {
|
||
Kodaikanal Digitized White-light Data Archive (1921-2011): Analysis of various solar cycle
|
||
features
|
||
},
|
||
author = {
|
||
Mandal, Sudip and Hegde, Manjunath and Samanta, Tanmoy and Hazra, Gopal and Banerjee, Dipankar
|
||
and Ravindra, B
|
||
},
|
||
journal = {arXiv preprint arXiv:1608.04665},
|
||
url = {http://arxiv.org/abs/1608.04665v2},
|
||
date = {2016-08-16},
|
||
primaryclass = {astro-ph.SR},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{Cheng2022,
|
||
title = {Memory-Efficient Deep Learning: Advances in Model Compression and Sparsification},
|
||
author = {Cheng, Yu and others},
|
||
year = {2022},
|
||
journal = {ACM Computing Surveys},
|
||
}
|
||
|
||
@article{Choi2019,
|
||
title = {PACT: Parameterized Clipping Activation for Quantized Neural Networks},
|
||
author = {
|
||
Choi, Jungwook and Wang, Zhuo and Venkataramani, Swagath and Chuang, Pierce I-Jen and
|
||
Srinivasan, Vijayalakshmi and Gopalakrishnan, Kailash
|
||
},
|
||
journal = {arXiv preprint},
|
||
url = {http://arxiv.org/abs/1805.06085v2},
|
||
date = {2018-05-16},
|
||
primaryclass = {cs.CV},
|
||
archiveprefix = {arXiv},
|
||
eprint = {1805.06085},
|
||
}
|
||
|
||
@article{choudhary2020comprehensive,
|
||
title = {A comprehensive survey on model compression and acceleration},
|
||
author = {Choudhary, Tejalal and Mishra, Vipul and Goswami, Anurag and Sarangapani, Jagannathan},
|
||
journal = {Artificial Intelligence Review},
|
||
publisher = {Springer Science and Business Media LLC},
|
||
volume = {53},
|
||
number = {7},
|
||
pages = {5113--5155},
|
||
doi = {10.1007/s10462-020-09816-7},
|
||
issn = {0269-2821,1573-7462},
|
||
url = {https://doi.org/10.1007/s10462-020-09816-7},
|
||
source = {Crossref},
|
||
date = {2020-02-08},
|
||
}
|
||
|
||
@article{choukroun2019low,
|
||
title = {Differentiable Soft Quantization: Bridging Full-Precision and Low-Bit Neural Networks},
|
||
author = {
|
||
Gong, Ruihao and Liu, Xianglong and Jiang, Shenghu and Li, Tianxiang and Hu, Peng and Lin,
|
||
Jiazhen and Yu, Fengwei and Yan, Junjie
|
||
},
|
||
journal = {arXiv preprint arXiv:1908.05033},
|
||
url = {http://arxiv.org/abs/1908.05033v1},
|
||
date = {2019-08-14},
|
||
primaryclass = {cs.CV},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@inproceedings{Chowdhery2021,
|
||
title = {Edge TPU: An Edge-Optimized Inference Accelerator for Deep Learning},
|
||
author = {
|
||
Chowdhery, Aakanksha and Noy, Anatoli and Misra, Gaurav and Dai, Zhuyun and Le, Quoc V. and
|
||
Dean, Jeff
|
||
},
|
||
year = {2021},
|
||
booktitle = {International Symposium on Computer Architecture},
|
||
}
|
||
|
||
@article{chowdhery2022palm,
|
||
title = {PaLM: Scaling Language Modeling with Pathways},
|
||
author = {
|
||
Chowdhery, Aakanksha and Narang, Sharan and Devlin, Jacob and Bosma, Maarten and Mishra, Gaurav
|
||
and Roberts, Adam and Barham, Paul and Chung, Hyung Won and Sutton, Charles and Gehrmann,
|
||
Sebastian and Schuh, Parker and Shi, Kensen and Tsvyashchenko, Sasha and Maynez, Joshua and
|
||
Rao, Abhishek and Barnes, Parker and Tay, Yi and Shazeer, Noam and Prabhakaran, Vinodkumar and
|
||
Reif, Emily and Du, Nan and Hutchinson, Ben and Pope, Reiner and Bradbury, James and Austin,
|
||
Jacob and Isard, Michael and Gur-Ari, Guy and Yin, Pengcheng and Duke, Toju and Levskaya,
|
||
Anselm and Ghemawat, Sanjay and Dev, Sunipa and Michalewski, Henryk and Garcia, Xavier and
|
||
Misra, Vedant and Robinson, Kevin and Fedus, Liam and Zhou, Denny and Ippolito, Daphne and
|
||
Luan, David and Lim, Hyeontaek and Zoph, Barret and Spiridonov, Alexander and Sepassi, Ryan and
|
||
Dohan, David and Agrawal, Shivani and Omernick, Mark and Dai, Andrew M. and Pillai,
|
||
Thanumalayan Sankaranarayana and Pellat, Marie and Lewkowycz, Aitor and Moreira, Erica and
|
||
Child, Rewon and Polozov, Oleksandr and Lee, Katherine and Zhou, Zongwei and Wang, Xuezhi and
|
||
Saeta, Brennan and Diaz, Mark and Firat, Orhan and Catasta, Michele and Wei, Jason and
|
||
Meier-Hellstern, Kathy and Eck, Douglas and Dean, Jeff and Petrov, Slav and Fiedel, Noah
|
||
},
|
||
journal = {arXiv preprint arXiv:2204.02311},
|
||
url = {http://arxiv.org/abs/2204.02311v5},
|
||
date = {2022-04-05},
|
||
primaryclass = {cs.CL},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@inproceedings{chu2021discovering,
|
||
title = {Discovering Multi-Hardware Mobile Models via Architecture Search},
|
||
author = {
|
||
Chu, Grace and Arikan, Okan and Bender, Gabriel and Wang, Weijun and Brighton, Achille and
|
||
Kindermans, Pieter-Jan and Liu, Hanxiao and Akin, Berkin and Gupta, Suyog and Howard, Andrew
|
||
},
|
||
booktitle = {2021 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
|
||
publisher = {IEEE},
|
||
pages = {3016--3025},
|
||
doi = {10.1109/cvprw53098.2021.00337},
|
||
url = {https://doi.org/10.1109/cvprw53098.2021.00337},
|
||
source = {Crossref},
|
||
date = {2021-06},
|
||
bibsource = {dblp computer science bibliography, https://dblp.org},
|
||
biburl = {https://dblp.org/rec/conf/cvpr/ChuABWBKLAG021.bib},
|
||
timestamp = {Mon, 18 Jul 2022 01:00:00 +0200},
|
||
}
|
||
|
||
@article{Courbariaux2016,
|
||
title = {BinaryConnect: Training Deep Neural Networks with Binary Weights during Propagations},
|
||
author = {Courbariaux, Matthieu and Bengio, Yoshua and David, Jean-Pierre},
|
||
year = {2016},
|
||
journal = {Advances in Neural Information Processing Systems (NeurIPS)},
|
||
volume = {28},
|
||
pages = {3123--3131},
|
||
}
|
||
|
||
@inproceedings{Cubuk2019,
|
||
title = {AutoAugment: Learning Augmentation Strategies From Data},
|
||
author = {Cubuk, Ekin D. and Zoph, Barret and Man\'{e}, Dandelion and Vasudevan, Vijay and Le, Quoc V.},
|
||
booktitle = {2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
publisher = {IEEE},
|
||
pages = {113--123},
|
||
doi = {10.1109/cvpr.2019.00020},
|
||
url = {https://doi.org/10.1109/cvpr.2019.00020},
|
||
source = {Crossref},
|
||
date = {2019-06},
|
||
}
|
||
|
||
@article{dao2022monarchexpressivestructuredmatrices,
|
||
title = {Monarch: Expressive Structured Matrices for Efficient and Accurate Training},
|
||
author = {
|
||
Dao, Tri and Chen, Beidi and Sohoni, Nimit and Desai, Arjun and Poli, Michael and Grogan,
|
||
Jessica and Liu, Alexander and Rao, Aniruddh and Rudra, Atri and R\'{e}, Christopher
|
||
},
|
||
url = {http://arxiv.org/abs/2204.00595v1},
|
||
date = {2022-04-01},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
eprint = {2204.00595},
|
||
}
|
||
|
||
@article{Davies2021,
|
||
title = {Advancing Neuromorphic Computing with Sparse Networks},
|
||
author = {Davies, Mike and others},
|
||
year = {2021},
|
||
journal = {Nature Electronics},
|
||
}
|
||
|
||
@article{dean2018new,
|
||
title = {A New Golden Age in Computer Architecture: Empowering the Machine-Learning Revolution},
|
||
author = {Dean, Jeff and Patterson, David and Young, Cliff},
|
||
journal = {IEEE Micro},
|
||
publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
|
||
volume = {38},
|
||
number = {2},
|
||
pages = {21--29},
|
||
doi = {10.1109/mm.2018.112130030},
|
||
issn = {0272-1732,1937-4143},
|
||
url = {https://doi.org/10.1109/mm.2018.112130030},
|
||
source = {Crossref},
|
||
date = {2018-03},
|
||
}
|
||
|
||
@inproceedings{Denton2014,
|
||
title = {Exploiting Linear Structure Within Convolutional Networks for Efficient Evaluation},
|
||
author = {Denton, Emily L and Chintala, Soumith and Fergus, Rob},
|
||
year = {2014},
|
||
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
|
||
pages = {1269--1277},
|
||
}
|
||
|
||
@article{dettmers2019sparse,
|
||
title = {Sparse Networks from Scratch: Faster Training without Losing Performance},
|
||
author = {Dettmers, Tim and Zettlemoyer, Luke},
|
||
journal = {arXiv preprint arXiv:1907.04840},
|
||
url = {http://arxiv.org/abs/1907.04840v2},
|
||
date = {2019-07-10},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{devlin2018bert,
|
||
title = {BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
|
||
author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
|
||
journal = {arXiv preprint arXiv:1810.04805},
|
||
url = {http://arxiv.org/abs/1810.04805v2},
|
||
date = {2018-10-11},
|
||
primaryclass = {cs.CL},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@inproceedings{dong2022splitnets,
|
||
title = {
|
||
SplitNets: Designing Neural Architectures for Efficient Distributed Computing on Head-Mounted
|
||
Systems
|
||
},
|
||
author = {
|
||
Dong, Xin and De Salvo, Barbara and Li, Meng and Liu, Chiao and Qu, Zhongnan and Kung, H.T. and
|
||
Li, Ziyun
|
||
},
|
||
booktitle = {2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
publisher = {IEEE},
|
||
pages = {12549--12559},
|
||
doi = {10.1109/cvpr52688.2022.01223},
|
||
url = {https://doi.org/10.1109/cvpr52688.2022.01223},
|
||
source = {Crossref},
|
||
date = {2022-06},
|
||
bibsource = {dblp computer science bibliography, https://dblp.org},
|
||
biburl = {https://dblp.org/rec/conf/cvpr/DongSLLQ0L22.bib},
|
||
timestamp = {Sun, 22 Jan 2023 00:00:00 +0100},
|
||
}
|
||
|
||
@article{dynamicpruning2023,
|
||
title = {A Dynamic Pruning Method on Multiple Sparse Structures in Deep Neural Networks},
|
||
author = {
|
||
Hu, Jie and Lin, Peng and Zhang, Huajun and Lan, Zining and Chen, Wenxin and Xie, Kailiang and
|
||
Chen, Siyun and Wang, Hao and Chang, Sheng
|
||
},
|
||
journal = {IEEE Access},
|
||
publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
|
||
volume = {11},
|
||
pages = {38448--38457},
|
||
doi = {10.1109/access.2023.3267469},
|
||
issn = {2169-3536},
|
||
url = {https://doi.org/10.1109/access.2023.3267469},
|
||
source = {Crossref},
|
||
date = {2023},
|
||
keywords = {
|
||
Sparse matrices;Filtering theory;Training;Information filters;Neural networks;Tensors;Deep
|
||
learning;Convolutional neural networks;dynamic channel pruning;network compression and
|
||
acceleration;structured pruning
|
||
},
|
||
}
|
||
|
||
@inproceedings{elsen2020fast,
|
||
title = {Fast Sparse ConvNets},
|
||
author = {Elsen, Erich and Dukhan, Marat and Gale, Trevor and Simonyan, Karen},
|
||
booktitle = {2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
publisher = {IEEE},
|
||
pages = {14617--14626},
|
||
doi = {10.1109/cvpr42600.2020.01464},
|
||
url = {https://doi.org/10.1109/cvpr42600.2020.01464},
|
||
source = {Crossref},
|
||
date = {2020-06},
|
||
}
|
||
|
||
@incollection{Elsken2019,
|
||
title = {Neural Architecture Search},
|
||
author = {Elsken, Thomas and Metzen, Jan Hendrik and Hutter, Frank},
|
||
journal = {Journal of Machine Learning Research},
|
||
booktitle = {Automated Machine Learning},
|
||
publisher = {Springer International Publishing},
|
||
pages = {63--77},
|
||
doi = {10.1007/978-3-030-05318-5\_3},
|
||
isbn = {9783030053178,9783030053185},
|
||
issn = {2520-131X,2520-1328},
|
||
url = {https://doi.org/10.1007/978-3-030-05318-5\_3},
|
||
source = {Crossref},
|
||
date = {2019},
|
||
}
|
||
|
||
@incollection{elsken2019neural,
|
||
title = {Neural Architecture Search},
|
||
author = {Elsken, Thomas and Metzen, Jan Hendrik and Hutter, Frank},
|
||
journal = {Journal of Machine Learning Research},
|
||
booktitle = {Automated Machine Learning},
|
||
publisher = {Springer International Publishing},
|
||
volume = {20},
|
||
number = {55},
|
||
pages = {63--77},
|
||
doi = {10.1007/978-3-030-05318-5\_3},
|
||
isbn = {9783030053178,9783030053185},
|
||
issn = {2520-131X,2520-1328},
|
||
url = {https://doi.org/10.1007/978-3-030-05318-5\_3},
|
||
source = {Crossref},
|
||
date = {2019},
|
||
}
|
||
|
||
@article{fahim2021hls4ml,
|
||
title = {
|
||
hls4ml: An Open-Source Codesign Workflow to Empower Scientific Low-Power Machine Learning
|
||
Devices
|
||
},
|
||
author = {
|
||
Fahim, Farah and Hawks, Benjamin and Herwig, Christian and Hirschauer, James and Jindariani,
|
||
Sergo and Tran, Nhan and Carloni, Luca P. and Guglielmo, Giuseppe Di and Harris, Philip and
|
||
Krupa, Jeffrey and Rankin, Dylan and Valentin, Manuel Blanco and Hester, Josiah and Luo, Yingyi
|
||
and Mamish, John and Orgrenci-Memik, Seda and Aarrestad, Thea and Javed, Hamza and Loncar,
|
||
Vladimir and Pierini, Maurizio and Pol, Adrian Alan and Summers, Sioni and Duarte, Javier and
|
||
Hauck, Scott and Hsu, Shih-Chieh and Ngadiuba, Jennifer and Liu, Mia and Hoang, Duc and
|
||
Kreinar, Edward and Wu, Zhenbin
|
||
},
|
||
url = {http://arxiv.org/abs/2103.05579v3},
|
||
date = {2021-03-09},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
eprint = {2103.05579},
|
||
}
|
||
|
||
@article{fedus2021switch,
|
||
title = {Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity},
|
||
author = {Fedus, William and Zoph, Barret and Shazeer, Noam},
|
||
year = {2021},
|
||
journal = {Journal of Machine Learning Research},
|
||
}
|
||
|
||
@incollection{Feurer2015,
|
||
title = {Auto-sklearn: Efficient and Robust Automated Machine Learning},
|
||
author = {
|
||
Feurer, Matthias and Klein, Aaron and Eggensperger, Katharina and Springenberg, Jost Tobias and
|
||
Blum, Manuel and Hutter, Frank
|
||
},
|
||
journal = {Advances in Neural Information Processing Systems},
|
||
booktitle = {Automated Machine Learning},
|
||
publisher = {Springer International Publishing},
|
||
pages = {113--134},
|
||
doi = {10.1007/978-3-030-05318-5\_6},
|
||
isbn = {9783030053178,9783030053185},
|
||
issn = {2520-131X,2520-1328},
|
||
url = {https://doi.org/10.1007/978-3-030-05318-5\_6},
|
||
source = {Crossref},
|
||
date = {2019},
|
||
}
|
||
|
||
@article{frankle2018lottery,
|
||
title = {The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks},
|
||
author = {Frankle, Jonathan and Carbin, Michael},
|
||
journal = {arXiv preprint arXiv:1803.03635},
|
||
url = {http://arxiv.org/abs/1803.03635v5},
|
||
date = {2018-03-09},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{gale2019state,
|
||
title = {The State of Sparsity in Deep Neural Networks},
|
||
author = {Gale, Trevor and Elsen, Erich and Hooker, Sara},
|
||
journal = {arXiv preprint arXiv:1902.09574},
|
||
url = {http://arxiv.org/abs/1902.09574v1},
|
||
date = {2019-02-25},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{Gale2020,
|
||
title = {On the Bauer-Furuta and Seiberg-Witten invariants of families of $4$-manifolds},
|
||
author = {Baraglia, David and Konno, Hokuto},
|
||
journal = {arXiv preprint arXiv:1903.01649},
|
||
booktitle = {Proceedings of the International Conference on Machine Learning (ICML)},
|
||
pages = {8955--8967},
|
||
url = {http://arxiv.org/abs/1903.01649v3},
|
||
date = {2019-03-05},
|
||
primaryclass = {math.DG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{gale2020sparse,
|
||
title = {The State of Sparsity in Deep Neural Networks},
|
||
author = {Gale, Trevor and Elsen, Erich and Hooker, Sara},
|
||
journal = {arXiv preprint arXiv:1902.09574},
|
||
url = {http://arxiv.org/abs/1902.09574v1},
|
||
date = {2019-02-25},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{gale2022megablocksefficientsparsetraining,
|
||
title = {MegaBlocks: Efficient Sparse Training with Mixture-of-Experts},
|
||
author = {Gale, Trevor and Narayanan, Deepak and Young, Cliff and Zaharia, Matei},
|
||
url = {http://arxiv.org/abs/2211.15841v1},
|
||
date = {2022-11-29},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
eprint = {2211.15841},
|
||
}
|
||
|
||
@article{Gholami2021,
|
||
title = {A Survey of Quantization Methods for Efficient Neural Network Inference},
|
||
author = {
|
||
Gholami, Amir and Kim, Sehoon and Dong, Zhen and Yao, Zhewei and Mahoney, Michael W. and
|
||
Keutzer, Kurt
|
||
},
|
||
journal = {arXiv preprint arXiv:2103.13630},
|
||
url = {http://arxiv.org/abs/2103.13630v3},
|
||
date = {2021-03-25},
|
||
primaryclass = {cs.CV},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{gholami2021survey,
|
||
title = {A Survey of Quantization Methods for Efficient Neural Network Inference},
|
||
author = {
|
||
Gholami, Amir and Kim, Sehoon and Dong, Zhen and Yao, Zhewei and Mahoney, Michael W. and
|
||
Keutzer, Kurt
|
||
},
|
||
journal = {arXiv preprint arXiv:2103.13630},
|
||
volume = {abs/2103.13630},
|
||
url = {http://arxiv.org/abs/2103.13630v3},
|
||
date = {2021-03-25},
|
||
primaryclass = {cs.CV},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@inproceedings{gordon2018morphnet,
|
||
title = {MorphNet: Fast \& Simple Resource-Constrained Structure Learning of Deep Networks},
|
||
author = {
|
||
Gordon, Ariel and Eban, Elad and Nachum, Ofir and Chen, Bo and Wu, Hao and Yang, Tien-Ju and
|
||
Choi, Edward
|
||
},
|
||
booktitle = {2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition},
|
||
publisher = {IEEE},
|
||
pages = {1586--1595},
|
||
doi = {10.1109/cvpr.2018.00171},
|
||
url = {https://doi.org/10.1109/cvpr.2018.00171},
|
||
source = {Crossref},
|
||
date = {2018-06},
|
||
}
|
||
|
||
@inproceedings{gordon2020compressing,
|
||
title = {Compressing BERT: Studying the Effects of Weight Pruning on Transfer Learning},
|
||
author = {Gordon, Mitchell and Duh, Kevin and Andrews, Nicholas},
|
||
booktitle = {Proceedings of the 5th Workshop on Representation Learning for NLP},
|
||
publisher = {Association for Computational Linguistics},
|
||
doi = {10.18653/v1/2020.repl4nlp-1.18},
|
||
url = {https://doi.org/10.18653/v1/2020.repl4nlp-1.18},
|
||
source = {Crossref},
|
||
date = {2020},
|
||
}
|
||
|
||
@article{gou2021knowledge,
|
||
title = {Knowledge Distillation: A Survey},
|
||
author = {Gou, Jianping and Yu, Baosheng and Maybank, Stephen J. and Tao, Dacheng},
|
||
journal = {International Journal of Computer Vision},
|
||
publisher = {Springer Science and Business Media LLC},
|
||
volume = {129},
|
||
number = {6},
|
||
pages = {1789--1819},
|
||
doi = {10.1007/s11263-021-01453-z},
|
||
issn = {0920-5691,1573-1405},
|
||
url = {https://doi.org/10.1007/s11263-021-01453-z},
|
||
source = {Crossref},
|
||
date = {2021-03-22},
|
||
}
|
||
|
||
@misc{gu2023deep,
|
||
title = {Deep Learning Model Compression (ii) by Ivy Gu Medium},
|
||
author = {Gu, Ivy},
|
||
year = {2023},
|
||
url = {https://ivygdy.medium.com/deep-learning-model-compression-ii-546352ea9453},
|
||
urldate = {2023-10-20},
|
||
bdsk-url-1 = {https://ivygdy.medium.com/deep-learning-model-compression-ii-546352ea9453},
|
||
}
|
||
|
||
@inproceedings{gupta2015deep,
|
||
title = {Deep learning with limited numerical precision},
|
||
author = {Gupta, Suyog and Agrawal, Ankur and Gopalakrishnan, Kailash and Narayanan, Pritish},
|
||
year = {2015},
|
||
booktitle = {International conference on machine learning},
|
||
pages = {1737--1746},
|
||
organization = {PMLR},
|
||
}
|
||
|
||
@article{Han2015,
|
||
title = {Learning both Weights and Connections for Efficient Neural Networks},
|
||
author = {Han, Song and Pool, Jeff and Tran, John and Dally, William J.},
|
||
journal = {CoRR},
|
||
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
|
||
volume = {abs/1506.02626},
|
||
pages = {1135--1143},
|
||
url = {http://arxiv.org/abs/1506.02626v3},
|
||
date = {2015-06-08},
|
||
primaryclass = {cs.NE},
|
||
archiveprefix = {arXiv},
|
||
source = {DBLP},
|
||
}
|
||
|
||
@article{han2015deep,
|
||
title = {
|
||
Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and
|
||
Huffman Coding
|
||
},
|
||
author = {Han, Song and Mao, Huizi and Dally, William J.},
|
||
journal = {arXiv preprint arXiv:1510.00149},
|
||
url = {http://arxiv.org/abs/1510.00149v5},
|
||
date = {2015-10-01},
|
||
primaryclass = {cs.CV},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{Han2016,
|
||
title = {
|
||
Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and
|
||
Huffman Coding
|
||
},
|
||
author = {Han, Song and Mao, Huizi and Dally, William J.},
|
||
year = {2016},
|
||
journal = {International Conference on Learning Representations (ICLR)},
|
||
}
|
||
|
||
@article{hawks2021psandqs,
|
||
title = {Ps and Qs: Quantization-Aware Pruning for Efficient Low Latency Neural Network Inference},
|
||
author = {
|
||
Hawks, Benjamin and Duarte, Javier and Fraser, Nicholas J. and Pappalardo, Alessandro and Tran,
|
||
Nhan and Umuroglu, Yaman
|
||
},
|
||
journal = {Frontiers in Artificial Intelligence},
|
||
publisher = {Frontiers Media SA},
|
||
volume = {4},
|
||
doi = {10.3389/frai.2021.676564},
|
||
issn = {2624-8212},
|
||
url = {https://doi.org/10.3389/frai.2021.676564},
|
||
source = {Crossref},
|
||
date = {2021-07-09},
|
||
}
|
||
|
||
@article{he2016deep,
|
||
title = {Deep Residual Learning for Image Recognition},
|
||
author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
|
||
booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
pages = {770--778},
|
||
doi = {10.1109/CVPR.2016.90},
|
||
url = {http://arxiv.org/abs/1512.03385v1},
|
||
date = {2015-12-10},
|
||
primaryclass = {cs.CV},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@incollection{He2018,
|
||
title = {AMC: AutoML for Model Compression and Acceleration on Mobile Devices},
|
||
author = {He, Yihui and Lin, Ji and Liu, Zhijian and Wang, Hanrui and Li, Li-Jia and Han, Song},
|
||
journal = {European Conference on Computer Vision},
|
||
booktitle = {Computer Vision – ECCV 2018},
|
||
publisher = {Springer International Publishing},
|
||
pages = {815--832},
|
||
doi = {10.1007/978-3-030-01234-2\_48},
|
||
isbn = {9783030012335,9783030012342},
|
||
issn = {0302-9743,1611-3349},
|
||
url = {https://doi.org/10.1007/978-3-030-01234-2\_48},
|
||
source = {Crossref},
|
||
date = {2018},
|
||
}
|
||
|
||
@inproceedings{hegde2023introduction,
|
||
title = {Towards a new interpretation of separable convolutions},
|
||
author = {Ghosh, Tapabrata},
|
||
booktitle = {2017 Intelligent Systems Conference (IntelliSys)},
|
||
publisher = {IEEE},
|
||
pages = {112--116},
|
||
doi = {10.1109/intellisys.2017.8324241},
|
||
url = {https://doi.org/10.1109/intellisys.2017.8324241},
|
||
urldate = {2023-10-20},
|
||
source = {Crossref},
|
||
date = {2017-09},
|
||
bdsk-url-1 = {https://www.analyticsvidhya.com/blog/2021/11/an-introduction-to-separable-convolutions/},
|
||
}
|
||
|
||
@article{hinton2015distilling,
|
||
title = {Distilling the Knowledge in a Neural Network},
|
||
author = {Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff},
|
||
booktitle = {NIPS Deep Learning and Representation Learning Workshop},
|
||
publisher = {Wiley},
|
||
doi = {10.1002/0471743984.vse0673},
|
||
isbn = {9780471332305,9780471743989},
|
||
url = {http://arxiv.org/abs/1503.02531v1},
|
||
date = {2015-03-09},
|
||
primaryclass = {stat.ML},
|
||
archiveprefix = {arXiv},
|
||
eprint = {1503.02531},
|
||
source = {Crossref},
|
||
}
|
||
|
||
@article{Hoefler2021,
|
||
title = {
|
||
Sparsity in Deep Learning: Pruning and growth for efficient inference and training in neural
|
||
networks
|
||
},
|
||
author = {Hoefler, Torsten and Alistarh, Dan and Ben-Nun, Tal and Dryden, Nikoli and Peste, Alexandra},
|
||
journal = {arXiv preprint arXiv:2102.00554},
|
||
volume = {22},
|
||
pages = {1--124},
|
||
url = {http://arxiv.org/abs/2102.00554v1},
|
||
date = {2021-01-31},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{hoefler2021sparsity,
|
||
title = {
|
||
Sparsity in Deep Learning: Pruning and growth for efficient inference and training in neural
|
||
networks
|
||
},
|
||
author = {
|
||
Hoefler, Torsten and Alistarh, Dan and Ben-Nun, Tal and Dryden, Nikoli and Ziogas, Alexandros
|
||
Nikolaos
|
||
},
|
||
year = {2021},
|
||
journal = {Journal of Machine Learning Research},
|
||
volume = {22},
|
||
number = {241},
|
||
pages = {1--124},
|
||
}
|
||
|
||
@article{howard2017mobilenets,
|
||
title = {MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications},
|
||
author = {
|
||
Howard, Andrew G. and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and
|
||
Weyand, Tobias and Andreetto, Marco and Adam, Hartwig
|
||
},
|
||
journal = {ArXiv preprint},
|
||
booktitle = {arXiv preprint arXiv:1704.04861},
|
||
volume = {abs/1704.04861},
|
||
url = {http://arxiv.org/abs/1704.04861v1},
|
||
date = {2017-04-17},
|
||
primaryclass = {cs.CV},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{hu2021lora,
|
||
title = {LoRA: Low-Rank Adaptation of Large Language Models},
|
||
author = {
|
||
Hu, Edward J. and Shen, Yelong and Wallis, Phillip and Allen-Zhu, Zeyuan and Li, Yuanzhi and
|
||
Wang, Shean and Wang, Lu and Chen, Weizhu
|
||
},
|
||
journal = {arXiv preprint arXiv:2106.09685},
|
||
url = {http://arxiv.org/abs/2106.09685v2},
|
||
date = {2021-06-17},
|
||
primaryclass = {cs.CL},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{hu2021triple,
|
||
title = {
|
||
Triple Wins: Boosting Accuracy, Robustness and Efficiency Together by Enabling Input-Adaptive
|
||
Inference
|
||
},
|
||
author = {Hu, Bowen and Zhang, Zhiqiang and Fu, Yun},
|
||
year = {2021},
|
||
journal = {Advances in Neural Information Processing Systems},
|
||
volume = {34},
|
||
pages = {18537--18550},
|
||
}
|
||
|
||
@article{huang2023adaptive,
|
||
title = {Adaptive Neural Networks for Real-Time Processing in Autonomous Systems},
|
||
author = {Huang, Wei and Chen, Jie and Zhang, Lei},
|
||
year = {2023},
|
||
journal = {IEEE Transactions on Intelligent Transportation Systems},
|
||
publisher = {IEEE},
|
||
}
|
||
|
||
@article{Hubara2018,
|
||
title = {
|
||
Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations
|
||
},
|
||
author = {
|
||
Hubara, Itay and Courbariaux, Matthieu and Soudry, Daniel and El-Yaniv, Ran and Bengio, Yoshua
|
||
},
|
||
year = {2018},
|
||
journal = {Journal of Machine Learning Research (JMLR)},
|
||
volume = {18},
|
||
pages = {1--30},
|
||
}
|
||
|
||
@book{Hutter2019,
|
||
title = {Automated Machine Learning},
|
||
author = {Hutter, Frank and Kotthoff, Lars and Vanschoren, Joaquin},
|
||
publisher = {Springer International Publishing},
|
||
doi = {10.1007/978-3-030-05318-5},
|
||
isbn = {9783030053178,9783030053185},
|
||
issn = {2520-131X,2520-1328},
|
||
url = {https://doi.org/10.1007/978-3-030-05318-5},
|
||
source = {Crossref},
|
||
subtitle = {Methods, Systems, Challenges},
|
||
date = {2019},
|
||
}
|
||
|
||
@article{iandola2016squeezenet,
|
||
title = {SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size},
|
||
author = {
|
||
Iandola, Forrest N. and Han, Song and Moskewicz, Matthew W. and Ashraf, Khalid and Dally,
|
||
William J. and Keutzer, Kurt
|
||
},
|
||
journal = {ArXiv preprint},
|
||
volume = {abs/1602.07360},
|
||
url = {http://arxiv.org/abs/1602.07360v4},
|
||
date = {2016-02-24},
|
||
primaryclass = {cs.CV},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@misc{intellabs2023knowledge,
|
||
title = {Neural Network Compression and Knowledge Distillation: Tutorial and Survey},
|
||
author = {Ghojogh, Benyamin and Ghodsi, Ali},
|
||
publisher = {Center for Open Science},
|
||
doi = {10.31219/osf.io/4n2cb},
|
||
url = {https://doi.org/10.31219/osf.io/4n2cb},
|
||
urldate = {2023-10-20},
|
||
source = {Crossref},
|
||
date = {2024-10-15},
|
||
bdsk-url-1 = {https://intellabs.github.io/distiller/knowledge\_distillation.html},
|
||
}
|
||
|
||
@inproceedings{isscc2014computings,
|
||
title = {1.1 Computing's energy problem (and what we can do about it)},
|
||
author = {Horowitz, Mark},
|
||
booktitle = {2014 IEEE International Solid-State Circuits Conference Digest of Technical Papers (ISSCC)},
|
||
publisher = {IEEE},
|
||
doi = {10.1109/isscc.2014.6757323},
|
||
url = {https://doi.org/10.1109/isscc.2014.6757323},
|
||
urldate = {2014-03-06},
|
||
source = {Crossref},
|
||
date = {2014-02},
|
||
bdsk-url-1 = {https://ieeexplore.ieee.org/document/6757323},
|
||
}
|
||
|
||
@inproceedings{Jacob2018,
|
||
title = {Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference},
|
||
author = {
|
||
Jacob, Benoit and Kligys, Skirmantas and Chen, Bo and Zhu, Menglong and Tang, Matthew and
|
||
Howard, Andrew and Adam, Hartwig and Kalenichenko, Dmitry
|
||
},
|
||
journal = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
booktitle = {2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition},
|
||
publisher = {IEEE},
|
||
pages = {2704--2713},
|
||
doi = {10.1109/cvpr.2018.00286},
|
||
url = {https://doi.org/10.1109/cvpr.2018.00286},
|
||
source = {Crossref},
|
||
date = {2018-06},
|
||
}
|
||
|
||
@inproceedings{jacob2018quantization,
|
||
title = {Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference},
|
||
author = {
|
||
Jacob, Benoit and Kligys, Skirmantas and Chen, Bo and Zhu, Menglong and Tang, Matthew and
|
||
Howard, Andrew and Adam, Hartwig and Kalenichenko, Dmitry
|
||
},
|
||
journal = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
booktitle = {2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition},
|
||
publisher = {IEEE},
|
||
pages = {2704--2713},
|
||
doi = {10.1109/cvpr.2018.00286},
|
||
url = {https://doi.org/10.1109/cvpr.2018.00286},
|
||
source = {Crossref},
|
||
date = {2018-06},
|
||
}
|
||
|
||
@article{jia2016dynamic,
|
||
title = {Dynamic Filter Networks},
|
||
author = {Jia, Xu and De Brabandere, Bert and Tuytelaars, Tinne and Gool, Luc Van},
|
||
year = {2016},
|
||
journal = {Advances in Neural Information Processing Systems},
|
||
volume = {29},
|
||
}
|
||
|
||
@article{jiang2019accuracy,
|
||
title = {
|
||
Accuracy vs. Efficiency: Achieving Both through FPGA-Implementation Aware Neural Architecture
|
||
Search
|
||
},
|
||
author = {
|
||
Jiang, Weiwen and Zhang, Xinyi and Sha, Edwin H. -M. and Yang, Lei and Zhuge, Qingfeng and Shi,
|
||
Yiyu and Hu, Jingtong
|
||
},
|
||
publisher = {Wiley},
|
||
pages = {351--375},
|
||
doi = {10.1002/9783527829026.ch13},
|
||
isbn = {9783527348091,9783527829026},
|
||
url = {http://arxiv.org/abs/1901.11211v1},
|
||
date = {2019-01-31},
|
||
primaryclass = {cs.DC},
|
||
archiveprefix = {arXiv},
|
||
eprint = {1901.11211},
|
||
source = {Crossref},
|
||
}
|
||
|
||
@inproceedings{jiao2020tinybert,
|
||
title = {TinyBERT: Distilling BERT for Natural Language Understanding},
|
||
author = {
|
||
Jiao, Xiaoqi and Yin, Yichun and Shang, Lifeng and Jiang, Xin and Chen, Xiao and Li, Linlin and
|
||
Wang, Fang and Liu, Qun
|
||
},
|
||
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2020},
|
||
publisher = {Association for Computational Linguistics},
|
||
doi = {10.18653/v1/2020.findings-emnlp.372},
|
||
url = {https://doi.org/10.18653/v1/2020.findings-emnlp.372},
|
||
source = {Crossref},
|
||
date = {2020},
|
||
}
|
||
|
||
@article{jonathan2019lottery,
|
||
title = {The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks},
|
||
author = {Frankle, Jonathan and Carbin, Michael},
|
||
journal = {arXiv preprint arXiv:1803.03635},
|
||
booktitle = {
|
||
7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May
|
||
6-9, 2019
|
||
},
|
||
publisher = {OpenReview.net},
|
||
url = {http://arxiv.org/abs/1803.03635v5},
|
||
date = {2018-03-09},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
source = {DBLP},
|
||
bibsource = {dblp computer science bibliography, https://dblp.org},
|
||
biburl = {https://dblp.org/rec/conf/iclr/FrankleC19.bib},
|
||
timestamp = {Thu, 25 Jul 2019 01:00:00 +0200},
|
||
}
|
||
|
||
@inproceedings{Joulin2017,
|
||
title = {Bag of Tricks for Efficient Text Classification},
|
||
author = {Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Mikolov, Tomas},
|
||
journal = {Journal of Machine Learning Research},
|
||
booktitle = {
|
||
Proceedings of the 15th Conference of the European Chapter of the Association for Computational
|
||
Linguistics: Volume 2, Short Papers
|
||
},
|
||
publisher = {Association for Computational Linguistics},
|
||
volume = {18},
|
||
pages = {1--42},
|
||
doi = {10.18653/v1/e17-2068},
|
||
url = {https://doi.org/10.18653/v1/e17-2068},
|
||
source = {Crossref},
|
||
date = {2017},
|
||
}
|
||
|
||
@inproceedings{jouppi2017datacenter,
|
||
title = {In-Datacenter Performance Analysis of a Tensor Processing Unit},
|
||
author = {
|
||
Jouppi, Norman P. and Young, Cliff and Patil, Nishant and Patterson, David and Agrawal, Gaurav
|
||
and Bajwa, Raminder and Bates, Sarah and Bhatia, Suresh and Boden, Nan and Borchers, Al and
|
||
Boyle, Rick and Cantin, Pierre-luc and Chao, Clifford and Clark, Chris and Coriell, Jeremy and
|
||
Daley, Mike and Dau, Matt and Dean, Jeffrey and Gelb, Ben and Ghaemmaghami, Tara Vazir and
|
||
Gottipati, Rajendra and Gulland, William and Hagmann, Robert and Ho, C. Richard and Hogberg,
|
||
Doug and Hu, John and Hundt, Robert and Hurt, Dan and Ibarz, Julian and Jaffey, Aaron and
|
||
Jaworski, Alek and Kaplan, Alexander and Khaitan, Harshit and Killebrew, Daniel and Koch, Andy
|
||
and Kumar, Naveen and Lacy, Steve and Laudon, James and Law, James and Le, Diemthu and Leary,
|
||
Chris and Liu, Zhuyuan and Lucke, Kyle and Lundin, Alan and MacKean, Gordon and Maggiore,
|
||
Adriana and Mahony, Maire and Miller, Kieran and Nagarajan, Rahul and Narayanaswami, Ravi and
|
||
Ni, Ray and Nix, Kathy and Norrie, Thomas and Omernick, Mark and Penukonda, Narayana and
|
||
Phelps, Andy and Ross, Jonathan and Ross, Matt and Salek, Amir and Samadiani, Emad and Severn,
|
||
Chris and Sizikov, Gregory and Snelham, Matthew and Souter, Jed and Steinberg, Dan and Swing,
|
||
Andy and Tan, Mercedes and Thorson, Gregory and Tian, Bo and Toma, Horia and Tuttle, Erick and
|
||
Vasudevan, Vijay and Walter, Richard and Wang, Walter and Wilcox, Eric and Yoon, Doe Hyun
|
||
},
|
||
journal = {Proceedings of the 44th Annual International Symposium on Computer Architecture (ISCA)},
|
||
booktitle = {Proceedings of the 44th Annual International Symposium on Computer Architecture},
|
||
publisher = {ACM},
|
||
pages = {1--12},
|
||
doi = {10.1145/3079856.3080246},
|
||
url = {https://doi.org/10.1145/3079856.3080246},
|
||
source = {Crossref},
|
||
date = {2017-06-24},
|
||
}
|
||
|
||
@inproceedings{Jouppi2021,
|
||
title = {Ten Lessons From Three Generations Shaped Google's TPUv4i : Industrial Product},
|
||
author = {
|
||
Jouppi, Norman P. and Hyun Yoon, Doe and Ashcraft, Matthew and Gottscho, Mark and Jablin,
|
||
Thomas B. and Kurian, George and Laudon, James and Li, Sheng and Ma, Peter and Ma, Xiaoyu and
|
||
Norrie, Thomas and Patil, Nishant and Prasad, Sushma and Young, Cliff and Zhou, Zongwei and
|
||
Patterson, David
|
||
},
|
||
journal = {Proceedings of the IEEE Hot Chips Symposium},
|
||
booktitle = {2021 ACM/IEEE 48th Annual International Symposium on Computer Architecture (ISCA)},
|
||
publisher = {IEEE},
|
||
pages = {1--14},
|
||
doi = {10.1109/isca52012.2021.00010},
|
||
url = {https://doi.org/10.1109/isca52012.2021.00010},
|
||
source = {Crossref},
|
||
date = {2021-06},
|
||
}
|
||
|
||
@article{koren2009matrix,
|
||
title = {Matrix Factorization Techniques for Recommender Systems},
|
||
author = {Koren, Yehuda and Bell, Robert and Volinsky, Chris},
|
||
journal = {Computer},
|
||
publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
|
||
volume = {42},
|
||
number = {8},
|
||
pages = {30--37},
|
||
doi = {10.1109/mc.2009.263},
|
||
issn = {0018-9162},
|
||
url = {https://doi.org/10.1109/mc.2009.263},
|
||
source = {Crossref},
|
||
date = {2009-08},
|
||
}
|
||
|
||
@article{krishna2023raman,
|
||
title = {RAMAN: A Re-configurable and Sparse tinyML Accelerator for Inference on Edge},
|
||
author = {
|
||
Krishna, Adithya and Nudurupati, Srikanth Rohit and G, Chandana D and Dwivedi, Pritesh and van
|
||
Schaik, Andr\'{e} and Mehendale, Mahesh and Thakur, Chetan Singh
|
||
},
|
||
url = {http://arxiv.org/abs/2306.06493v1},
|
||
date = {2023-06-10},
|
||
primaryclass = {cs.NE},
|
||
archiveprefix = {arXiv},
|
||
eprint = {2306.06493},
|
||
}
|
||
|
||
@article{krishnamoorthi2018quantizing,
|
||
title = {Quantizing deep convolutional networks for efficient inference: A whitepaper},
|
||
author = {Krishnamoorthi, Raghuraman},
|
||
journal = {arXiv preprint arXiv:1806.08342},
|
||
volume = {abs/1806.08342},
|
||
url = {http://arxiv.org/abs/1806.08342v1},
|
||
date = {2018-06-21},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{kullback1951information,
|
||
title = {On Information and Sufficiency},
|
||
author = {Kullback, S. and Leibler, R. A.},
|
||
journal = {The Annals of Mathematical Statistics},
|
||
publisher = {Institute of Mathematical Statistics},
|
||
volume = {22},
|
||
number = {1},
|
||
pages = {79--86},
|
||
doi = {10.1214/aoms/1177729694},
|
||
issn = {0003-4851},
|
||
url = {https://doi.org/10.1214/aoms/1177729694},
|
||
source = {Crossref},
|
||
date = {1951-03},
|
||
}
|
||
|
||
@article{kuzmin2022fp8,
|
||
title = {FP8 Quantization: The Power of the Exponent},
|
||
author = {
|
||
Kuzmin, Andrey and Baalen, Mart Van and Ren, Yuwei and Nagel, Markus and Peters, Jorn and
|
||
Blankevoort, Tijmen
|
||
},
|
||
url = {http://arxiv.org/abs/2208.09225v2},
|
||
date = {2022-08-19},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
eprint = {2208.09225},
|
||
}
|
||
|
||
@article{kwon2021hardwaresoftware,
|
||
title = {
|
||
Hardware/Software Co-Design for TinyML Voice-Recognition Application on Resource Frugal Edge
|
||
Devices
|
||
},
|
||
author = {Kwon, Jisu and Park, Daejin},
|
||
journal = {Applied Sciences},
|
||
publisher = {MDPI AG},
|
||
volume = {11},
|
||
number = {22},
|
||
pages = {11073},
|
||
doi = {10.3390/app112211073},
|
||
issn = {2076-3417},
|
||
url = {https://doi.org/10.3390/app112211073},
|
||
source = {Crossref},
|
||
date = {2021-11-22},
|
||
article-number = {11073},
|
||
bdsk-url-1 = {https://www.mdpi.com/2076-3417/11/22/11073},
|
||
bdsk-url-2 = {https://doi.org/10.3390/app112211073},
|
||
}
|
||
|
||
@article{lai2018cmsisnn,
|
||
title = {CMSIS-NN: Efficient Neural Network Kernels for Arm Cortex-M CPUs},
|
||
author = {Lai, Liangzhen and Suda, Naveen and Chandra, Vikas},
|
||
url = {http://arxiv.org/abs/1801.06601v1},
|
||
date = {2018-01-19},
|
||
primaryclass = {cs.NE},
|
||
archiveprefix = {arXiv},
|
||
eprint = {1801.06601},
|
||
}
|
||
|
||
@inproceedings{lecun1990optimal,
|
||
title = {Optimal Brain Damage.},
|
||
author = {LeCun, Yann and Denker, John S. and Solla, Sara A.},
|
||
year = {1989},
|
||
journal = {NIPS},
|
||
booktitle = {Advances in Neural Information Processing Systems 2 (NIPS 1989)},
|
||
pages = {598--605},
|
||
url = {http://papers.nips.cc/paper/250-optimal-brain-damage},
|
||
source = {DBLP},
|
||
}
|
||
|
||
@inproceedings{lepikhin2020gshard,
|
||
title = {GShard: Scaling Giant Models with Conditional Computation},
|
||
author = {Lepikhin, Dmitry and others},
|
||
year = {2020},
|
||
booktitle = {Proceedings of the International Conference on Learning Representations},
|
||
}
|
||
|
||
@article{Li2016,
|
||
title = {Ternary Weight Networks},
|
||
author = {Li, Fengfu and Liu, Bin and Wang, Xiaoxing and Zhang, Bo and Yan, Junchi},
|
||
journal = {arXiv preprint},
|
||
url = {http://arxiv.org/abs/1605.04711v3},
|
||
date = {2016-05-16},
|
||
primaryclass = {cs.CV},
|
||
archiveprefix = {arXiv},
|
||
eprint = {1605.04711},
|
||
}
|
||
|
||
@article{Li2021,
|
||
title = {Hyperband: A Novel Bandit-Based Approach to Hyperparameter Optimization.},
|
||
author = {
|
||
Li, Lisha and Jamieson, Kevin G. and DeSalvo, Giulia and Rostamizadeh, Afshin and Talwalkar,
|
||
Ameet
|
||
},
|
||
year = {2017},
|
||
journal = {J. Mach. Learn. Res.},
|
||
booktitle = {Journal of Machine Learning Research},
|
||
volume = {18},
|
||
pages = {185:1--185:52},
|
||
url = {https://jmlr.org/papers/v18/16-558.html},
|
||
source = {DBLP},
|
||
}
|
||
|
||
@inproceedings{lin2020mcunet,
|
||
title = {MCUNet: Tiny Deep Learning on IoT Devices},
|
||
author = {Lin, Ji and Chen, Wei-Ming and Lin, Yujun and Cohn, John and Gan, Chuang and Han, Song},
|
||
year = {2020},
|
||
booktitle = {
|
||
Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information
|
||
Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual
|
||
},
|
||
url = {https://proceedings.neurips.cc/paper/2020/hash/86c51678350f656dcc7f490a43946ee5-Abstract.html},
|
||
editor = {
|
||
Larochelle, Hugo and Ranzato, Marc'Aurelio and Hadsell, Raia and Balcan, Maria-Florina and Lin,
|
||
Hsuan-Tien
|
||
},
|
||
bibsource = {dblp computer science bibliography, https://dblp.org},
|
||
biburl = {https://dblp.org/rec/conf/nips/LinCLCG020.bib},
|
||
timestamp = {Thu, 11 Feb 2021 00:00:00 +0100},
|
||
}
|
||
|
||
@article{lin2023awq,
|
||
title = {AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration},
|
||
author = {
|
||
Lin, Ji and Tang, Jiaming and Tang, Haotian and Yang, Shang and Chen, Wei-Ming and Wang,
|
||
Wei-Chen and Xiao, Guangxuan and Dang, Xingyu and Gan, Chuang and Han, Song
|
||
},
|
||
journal = {arXiv preprint arXiv:2306.00978},
|
||
volume = {abs/2306.00978},
|
||
url = {http://arxiv.org/abs/2306.00978v5},
|
||
date = {2023-06-01},
|
||
primaryclass = {cs.CL},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{lu2023steplearningnmstructured,
|
||
title = {STEP: Learning N:M Structured Sparsity Masks from Scratch with Precondition},
|
||
author = {
|
||
Lu, Yucheng and Agrawal, Shivani and Subramanian, Suvinay and Rybakov, Oleg and Sa, Christopher
|
||
De and Yazdanbakhsh, Amir
|
||
},
|
||
url = {http://arxiv.org/abs/2302.01172v1},
|
||
date = {2023-02-02},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
eprint = {2302.01172},
|
||
}
|
||
|
||
@article{lubana2020gradient,
|
||
title = {A Gradient Flow Framework For Analyzing Network Pruning},
|
||
author = {Lubana, Ekdeep Singh and Dick, Robert P.},
|
||
journal = {arXiv preprint arXiv:2009.11839},
|
||
url = {http://arxiv.org/abs/2009.11839v4},
|
||
date = {2020-09-24},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{mellempudi2019mixed,
|
||
title = {Mixed Precision Training With 8-bit Floating Point},
|
||
author = {Mellempudi, Naveen and Srinivasan, Sudarshan and Das, Dipankar and Kaul, Bharat},
|
||
journal = {arXiv preprint arXiv:1905.12334},
|
||
url = {http://arxiv.org/abs/1905.12334v1},
|
||
date = {2019-05-29},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{micikevicius2018mixed,
|
||
title = {Mixed Precision Training},
|
||
author = {
|
||
Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich
|
||
and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh,
|
||
Ganesh and Wu, Hao
|
||
},
|
||
journal = {arXiv preprint arXiv:1710.03740},
|
||
url = {http://arxiv.org/abs/1710.03740v3},
|
||
date = {2017-10-10},
|
||
primaryclass = {cs.AI},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{micikevicius2022fp8,
|
||
title = {FP8 Formats for Deep Learning},
|
||
author = {
|
||
Micikevicius, Paulius and Stosic, Dusan and Burgess, Neil and Cornea, Marius and Dubey, Pradeep
|
||
and Grisenthwaite, Richard and Ha, Sangwon and Heinecke, Alexander and Judd, Patrick and
|
||
Kamalu, John and Mellempudi, Naveen and Oberman, Stuart and Shoeybi, Mohammad and Siu, Michael
|
||
and Wu, Hao
|
||
},
|
||
journal = {arXiv preprint arXiv:2209.05433},
|
||
url = {http://arxiv.org/abs/2209.05433v2},
|
||
date = {2022-09-12},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{nagel2021white,
|
||
title = {A White Paper on Neural Network Quantization},
|
||
author = {
|
||
Nagel, Markus and Fournarakis, Marios and Amjad, Rana Ali and Bondarenko, Yelysei and van
|
||
Baalen, Mart and Blankevoort, Tijmen
|
||
},
|
||
journal = {arXiv preprint arXiv:2106.08295},
|
||
url = {http://arxiv.org/abs/2106.08295v1},
|
||
date = {2021-06-15},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{nagel2021whitepaper,
|
||
title = {A White Paper on Neural Network Quantization},
|
||
author = {
|
||
Nagel, Markus and Fournarakis, Marios and Amjad, Rana Ali and Bondarenko, Yelysei and van
|
||
Baalen, Mart and Blankevoort, Tijmen
|
||
},
|
||
journal = {arXiv preprint arXiv:2106.08295},
|
||
url = {http://arxiv.org/abs/2106.08295v1},
|
||
date = {2021-06-15},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@inproceedings{NVIDIA2020,
|
||
title = {
|
||
Demystifying the Nvidia Ampere Architecture through Microbenchmarking and Instruction-level
|
||
Analysis
|
||
},
|
||
author = {Abdelkhalik, Hamdy and Arafa, Yehia and Santhi, Nandakishore and Badawy, Abdel-Hameed A.},
|
||
booktitle = {2022 IEEE High Performance Extreme Computing Conference (HPEC)},
|
||
publisher = {IEEE},
|
||
doi = {10.1109/hpec55821.2022.9926299},
|
||
url = {https://doi.org/10.1109/hpec55821.2022.9926299},
|
||
source = {Crossref},
|
||
date = {2022-09-19},
|
||
}
|
||
|
||
@article{patterson2021carbon,
|
||
title = {Carbon Emissions and Large Neural Network Training},
|
||
author = {
|
||
Patterson, David and Gonzalez, Joseph and Le, Quoc and Liang, Chen and Munguia, Lluis-Miquel
|
||
and Rothchild, Daniel and So, David and Texier, Maud and Dean, Jeff
|
||
},
|
||
journal = {arXiv preprint arXiv:2104.10350},
|
||
url = {http://arxiv.org/abs/2104.10350v3},
|
||
date = {2021-04-21},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@inproceedings{prakash2022cfu,
|
||
title = {
|
||
CFU Playground: Full-Stack Open-Source Framework for Tiny Machine Learning (TinyML)
|
||
Acceleration on FPGAs
|
||
},
|
||
author = {
|
||
Prakash, Shvetank and Callahan, Tim and Bushagour, Joseph and Banbury, Colby and Green, Alan V.
|
||
and Warden, Pete and Ansell, Tim and Reddi, Vijay Janapa
|
||
},
|
||
journal = {ArXiv preprint},
|
||
booktitle = {2023 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)},
|
||
publisher = {IEEE},
|
||
volume = {abs/2201.01863},
|
||
pages = {157--167},
|
||
doi = {10.1109/ispass57527.2023.00024},
|
||
url = {https://doi.org/10.1109/ispass57527.2023.00024},
|
||
source = {Crossref},
|
||
date = {2023-04},
|
||
}
|
||
|
||
@article{qi2021efficient,
|
||
title = {An efficient pruning scheme of deep neural networks for Internet of Things applications},
|
||
author = {
|
||
Qi, Chen and Shen, Shibo and Li, Rongpeng and Zhao, Zhifeng and Liu, Qing and Liang, Jing and
|
||
Zhang, Honggang
|
||
},
|
||
journal = {EURASIP Journal on Advances in Signal Processing},
|
||
publisher = {Springer Science and Business Media LLC},
|
||
volume = {2021},
|
||
number = {1},
|
||
pages = {31},
|
||
doi = {10.1186/s13634-021-00744-4},
|
||
issn = {1687-6180},
|
||
url = {https://doi.org/10.1186/s13634-021-00744-4},
|
||
source = {Crossref},
|
||
date = {2021-06-29},
|
||
}
|
||
|
||
@inproceedings{rachwan2022winning,
|
||
title = {Winning the lottery ahead of time: Efficient early network pruning},
|
||
author = {
|
||
Rachwan, John and Z\"{u}gner, Daniel and Charpentier, Bertrand and Geisler, Simon and Ayle,
|
||
Morgane and G\"{u}nnemann, Stephan
|
||
},
|
||
year = {2022},
|
||
booktitle = {International Conference on Machine Learning},
|
||
pages = {18293--18309},
|
||
organization = {PMLR},
|
||
}
|
||
|
||
@inproceedings{radosavovic2020designing,
|
||
title = {Designing Network Design Spaces},
|
||
author = {
|
||
Radosavovic, Ilija and Kosaraju, Raj Prateek and Girshick, Ross and He, Kaiming and Dollar,
|
||
Piotr
|
||
},
|
||
journal = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
booktitle = {2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
publisher = {IEEE},
|
||
pages = {10428--10436},
|
||
doi = {10.1109/cvpr42600.2020.01044},
|
||
url = {https://doi.org/10.1109/cvpr42600.2020.01044},
|
||
source = {Crossref},
|
||
date = {2020-06},
|
||
}
|
||
|
||
@incollection{Rastegari2016,
|
||
title = {XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks},
|
||
author = {Rastegari, Mohammad and Ordonez, Vicente and Redmon, Joseph and Farhadi, Ali},
|
||
journal = {European Conference on Computer Vision (ECCV)},
|
||
booktitle = {Computer Vision – ECCV 2016},
|
||
publisher = {Springer International Publishing},
|
||
pages = {525--542},
|
||
doi = {10.1007/978-3-319-46493-0\_32},
|
||
isbn = {9783319464923,9783319464930},
|
||
issn = {0302-9743,1611-3349},
|
||
url = {https://doi.org/10.1007/978-3-319-46493-0\_32},
|
||
source = {Crossref},
|
||
date = {2016},
|
||
}
|
||
|
||
@article{Real2019,
|
||
title = {Regularized Evolution for Image Classifier Architecture Search},
|
||
author = {Real, Esteban and Aggarwal, Alok and Huang, Yanping and Le, Quoc V.},
|
||
journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
|
||
publisher = {Association for the Advancement of Artificial Intelligence (AAAI)},
|
||
volume = {33},
|
||
number = {01},
|
||
pages = {4780--4789},
|
||
doi = {10.1609/aaai.v33i01.33014780},
|
||
issn = {2374-3468,2159-5399},
|
||
url = {https://doi.org/10.1609/aaai.v33i01.33014780},
|
||
source = {Crossref},
|
||
date = {2019-07-17},
|
||
}
|
||
|
||
@article{real2019regularized,
|
||
title = {Regularized Evolution for Image Classifier Architecture Search},
|
||
author = {Real, Esteban and Aggarwal, Alok and Huang, Yanping and Le, Quoc V.},
|
||
journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
|
||
booktitle = {AAAI Conference on Artificial Intelligence},
|
||
publisher = {Association for the Advancement of Artificial Intelligence (AAAI)},
|
||
volume = {33},
|
||
number = {01},
|
||
pages = {4780--4789},
|
||
doi = {10.1609/aaai.v33i01.33014780},
|
||
issn = {2374-3468,2159-5399},
|
||
url = {https://doi.org/10.1609/aaai.v33i01.33014780},
|
||
source = {Crossref},
|
||
date = {2019-07-17},
|
||
}
|
||
|
||
@inproceedings{sabour2017dynamic,
|
||
title = {Dynamic Routing Between Capsules},
|
||
author = {Sabour, Sara and Frosst, Nicholas and Hinton, Geoffrey E},
|
||
year = {2017},
|
||
booktitle = {Advances in Neural Information Processing Systems},
|
||
volume = {30},
|
||
}
|
||
|
||
@article{sanh2019distilbert,
|
||
title = {DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter},
|
||
author = {Sanh, Victor and Debut, Lysandre and Chaumond, Julien and Wolf, Thomas},
|
||
journal = {arXiv preprint arXiv:1910.01108},
|
||
url = {http://arxiv.org/abs/1910.01108v4},
|
||
date = {2019-10-02},
|
||
primaryclass = {cs.CL},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{scardapane2020should,
|
||
title = {Why should I trust you? A survey of explainability of machine learning for healthcare},
|
||
author = {Scardapane, Simone and Wang, Ye and Panella, Massimo},
|
||
year = {2020},
|
||
journal = {Pattern Recognition Letters},
|
||
publisher = {Elsevier},
|
||
volume = {140},
|
||
pages = {47--57},
|
||
}
|
||
|
||
@inproceedings{shazeer2017outrageously,
|
||
title = {Outrageously large neural networks: The sparsely-gated mixture-of-experts layer},
|
||
author = {Shazeer, Noam and Mirhoseini, Azalia and Maziarz, Piotr and others},
|
||
year = {2017},
|
||
booktitle = {International Conference on Learning Representations},
|
||
}
|
||
|
||
@article{sheng2019qbert,
|
||
title = {Q-BERT: Hessian Based Ultra Low Precision Quantization of BERT},
|
||
author = {
|
||
Shen, Sheng and Dong, Zhen and Ye, Jiayu and Ma, Linjian and Yao, Zhewei and Gholami, Amir and
|
||
Mahoney, Michael W. and Keutzer, Kurt
|
||
},
|
||
journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
|
||
booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
|
||
publisher = {Association for the Advancement of Artificial Intelligence (AAAI)},
|
||
volume = {34},
|
||
number = {05},
|
||
pages = {8815--8821},
|
||
doi = {10.1609/aaai.v34i05.6409},
|
||
issn = {2374-3468,2159-5399},
|
||
url = {http://arxiv.org/abs/1909.05840v2},
|
||
date = {2019-09-12},
|
||
primaryclass = {cs.CL},
|
||
archiveprefix = {arXiv},
|
||
bibsource = {dblp computer science bibliography, https://dblp.org},
|
||
biburl = {https://dblp.org/rec/journals/corr/abs-1909-05840.bib},
|
||
eprint = {1909.05840},
|
||
eprinttype = {arXiv},
|
||
timestamp = {Wed, 18 Sep 2019 10:38:36 +0200},
|
||
source = {Crossref},
|
||
}
|
||
|
||
@inproceedings{sun2019patient,
|
||
title = {Patient Knowledge Distillation for BERT Model Compression},
|
||
author = {Sun, Siqi and Cheng, Yu and Gan, Zhe and Liu, Jingjing},
|
||
booktitle = {
|
||
Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the
|
||
9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
|
||
},
|
||
publisher = {Association for Computational Linguistics},
|
||
doi = {10.18653/v1/d19-1441},
|
||
url = {https://doi.org/10.18653/v1/d19-1441},
|
||
source = {Crossref},
|
||
date = {2019},
|
||
}
|
||
|
||
@inproceedings{Tan2019,
|
||
title = {EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks},
|
||
author = {Tan, Mingxing and Le, Quoc V.},
|
||
year = {2019},
|
||
booktitle = {International Conference on Machine Learning},
|
||
}
|
||
|
||
@inproceedings{tan2019efficientnet,
|
||
title = {EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks},
|
||
author = {Tan, Mingxing and Le, Quoc V},
|
||
year = {2019},
|
||
booktitle = {International Conference on Machine Learning (ICML)},
|
||
pages = {6105--6114},
|
||
}
|
||
|
||
@inproceedings{tan2019mnasnet,
|
||
title = {MnasNet: Platform-Aware Neural Architecture Search for Mobile},
|
||
author = {
|
||
Tan, Mingxing and Chen, Bo and Pang, Ruoming and Vasudevan, Vijay and Sandler, Mark and Howard,
|
||
Andrew and Le, Quoc V.
|
||
},
|
||
booktitle = {2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
publisher = {IEEE},
|
||
pages = {2815--2823},
|
||
doi = {10.1109/cvpr.2019.00293},
|
||
url = {https://doi.org/10.1109/cvpr.2019.00293},
|
||
source = {Crossref},
|
||
date = {2019-06},
|
||
bibsource = {dblp computer science bibliography, https://dblp.org},
|
||
biburl = {https://dblp.org/rec/conf/cvpr/TanCPVSHL19.bib},
|
||
timestamp = {Tue, 12 Jan 2021 00:00:00 +0100},
|
||
}
|
||
|
||
@article{tan2020efficientnet,
|
||
title = {EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks},
|
||
author = {Tan, Mingxing and Le, Quoc V.},
|
||
publisher = {Wiley},
|
||
pages = {111--131},
|
||
doi = {10.1002/9781394205639.ch6},
|
||
isbn = {9781394205608,9781394205639},
|
||
url = {http://arxiv.org/abs/1905.11946v5},
|
||
date = {2019-05-28},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
eprint = {1905.11946},
|
||
source = {Crossref},
|
||
}
|
||
|
||
@article{tang2020understanding,
|
||
title = {Primordial black holes and secondary gravitational waves from k/G inflation},
|
||
author = {Lin, Jiong and Gao, Qing and Gong, Yungui and Lu, Yizhou and Zhang, Chao and Zhang, Fengge},
|
||
journal = {arXiv preprint arXiv:2001.05909},
|
||
url = {http://arxiv.org/abs/2001.05909v2},
|
||
date = {2020-01-16},
|
||
primaryclass = {gr-qc},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{teerapittayanon2016branchynet,
|
||
title = {BranchyNet: Fast Inference via Early Exiting from Deep Neural Networks},
|
||
author = {Teerapittayanon, Surat and McDanel, Bradley and Kung, H. T.},
|
||
journal = {arXiv preprint arXiv:1709.01686},
|
||
booktitle = {2016 23rd International Conference on Pattern Recognition (ICPR)},
|
||
publisher = {IEEE},
|
||
pages = {2464--2469},
|
||
doi = {10.1109/icpr.2016.7900006},
|
||
url = {http://arxiv.org/abs/1709.01686v1},
|
||
date = {2017-09-06},
|
||
primaryclass = {cs.NE},
|
||
archiveprefix = {arXiv},
|
||
source = {Crossref},
|
||
}
|
||
|
||
@inproceedings{Umuroglu2017,
|
||
title = {FINN},
|
||
author = {
|
||
Umuroglu, Yaman and Fraser, Nicholas J. and Gambardella, Giulio and Blott, Michaela and Leong,
|
||
Philip and Jahre, Magnus and Vissers, Kees
|
||
},
|
||
journal = {ACM/SIGDA International Symposium on Field-Programmable Gate Arrays (FPGA)},
|
||
booktitle = {Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
|
||
publisher = {ACM},
|
||
pages = {65--74},
|
||
doi = {10.1145/3020078.3021744},
|
||
url = {https://doi.org/10.1145/3020078.3021744},
|
||
source = {Crossref},
|
||
subtitle = {A Framework for Fast, Scalable Binarized Neural Network Inference},
|
||
date = {2017-02-22},
|
||
}
|
||
|
||
@article{Vanschoren2019,
|
||
title = {Meta-Learning: A Survey},
|
||
author = {Vanschoren, Joaquin},
|
||
journal = {ArXiv preprint arXiv:1810.03548},
|
||
url = {http://arxiv.org/abs/1810.03548v1},
|
||
date = {2018-10-08},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@inproceedings{vaswani2017attention,
|
||
title = {The Best of Both Worlds: Combining Recent Advances in Neural Machine Translation},
|
||
author = {
|
||
Chen, Mia Xu and Firat, Orhan and Bapna, Ankur and Johnson, Melvin and Macherey, Wolfgang and
|
||
Foster, George and Jones, Llion and Schuster, Mike and Shazeer, Noam and Parmar, Niki and
|
||
Vaswani, Ashish and Uszkoreit, Jakob and Kaiser, Lukasz and Chen, Zhifeng and Wu, Yonghui and
|
||
Hughes, Macduff
|
||
},
|
||
journal = {Adv Neural Inf Process Syst},
|
||
booktitle = {
|
||
Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume
|
||
1: Long Papers)
|
||
},
|
||
publisher = {Association for Computational Linguistics},
|
||
volume = {30},
|
||
pages = {5998--6008},
|
||
doi = {10.18653/v1/p18-1008},
|
||
url = {https://doi.org/10.18653/v1/p18-1008},
|
||
source = {Crossref},
|
||
date = {2018},
|
||
}
|
||
|
||
@incollection{wang2018skipnet,
|
||
title = {SkipNet: Learning Dynamic Routing in Convolutional Networks},
|
||
author = {Wang, Xin and Yu, Fisher and Dou, Zi-Yi and Darrell, Trevor and Gonzalez, Joseph E.},
|
||
booktitle = {Computer Vision – ECCV 2018},
|
||
publisher = {Springer International Publishing},
|
||
pages = {420--436},
|
||
doi = {10.1007/978-3-030-01261-8\_25},
|
||
isbn = {9783030012601,9783030012618},
|
||
issn = {0302-9743,1611-3349},
|
||
url = {https://doi.org/10.1007/978-3-030-01261-8\_25},
|
||
source = {Crossref},
|
||
date = {2018},
|
||
organization = {Springer},
|
||
}
|
||
|
||
@article{wang2019benchmarking,
|
||
title = {Benchmarking TPU, GPU, and CPU Platforms for Deep Learning},
|
||
author = {Wang, Yu Emma and Wei, Gu-Yeon and Brooks, David},
|
||
journal = {arXiv preprint arXiv:1907.10701},
|
||
url = {http://arxiv.org/abs/1907.10701v4},
|
||
date = {2019-07-24},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{wang2021glam,
|
||
title = {EtherSolve: Computing an Accurate Control-Flow Graph from Ethereum Bytecode},
|
||
author = {Contro, Filippo and Crosara, Marco and Ceccato, Mariano and Preda, Mila Dalla},
|
||
journal = {arXiv preprint arXiv:2103.09113},
|
||
url = {http://arxiv.org/abs/2103.09113v1},
|
||
date = {2021-03-16},
|
||
primaryclass = {cs.SE},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@inproceedings{Wu2016,
|
||
title = {Quantized Convolutional Neural Networks for Mobile Devices},
|
||
author = {Wu, Jiaxiang and Leng, Cong and Wang, Yuhang and Hu, Qinghao and Cheng, Jian},
|
||
journal = {IEEE Conference on Computer Vision and Pattern Recognition},
|
||
booktitle = {2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
publisher = {IEEE},
|
||
pages = {4820--4828},
|
||
doi = {10.1109/cvpr.2016.521},
|
||
url = {https://doi.org/10.1109/cvpr.2016.521},
|
||
source = {Crossref},
|
||
date = {2016-06},
|
||
}
|
||
|
||
@inproceedings{wu2019fast,
|
||
title = {Fast Neural Networks: Efficient and Adaptive Computation for Inference},
|
||
author = {Wu, Jian and Cheng, Hao and Zhang, Yifan},
|
||
year = {2019},
|
||
booktitle = {Advances in Neural Information Processing Systems},
|
||
}
|
||
|
||
@inproceedings{wu2019fbnet,
|
||
title = {FBNet: Hardware-Aware Efficient ConvNet Design via Differentiable Neural Architecture Search},
|
||
author = {
|
||
Wu, Bichen and Keutzer, Kurt and Dai, Xiaoliang and Zhang, Peizhao and Wang, Yanghan and Sun,
|
||
Fei and Wu, Yiming and Tian, Yuandong and Vajda, Peter and Jia, Yangqing
|
||
},
|
||
booktitle = {2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
publisher = {IEEE},
|
||
pages = {10726--10734},
|
||
doi = {10.1109/cvpr.2019.01099},
|
||
url = {https://doi.org/10.1109/cvpr.2019.01099},
|
||
source = {Crossref},
|
||
date = {2019-06},
|
||
bibsource = {dblp computer science bibliography, https://dblp.org},
|
||
biburl = {https://dblp.org/rec/conf/cvpr/WuDZWSWTVJK19.bib},
|
||
timestamp = {Mon, 20 Jan 2020 00:00:00 +0100},
|
||
}
|
||
|
||
@article{wu2020integer,
|
||
title = {Integer Quantization for Deep Learning Inference: Principles and Empirical Evaluation},
|
||
author = {Wu, Hao and Judd, Patrick and Zhang, Xiaojie and Isaev, Mikhail and Micikevicius, Paulius},
|
||
journal = {arXiv preprint arXiv:2004.09602},
|
||
volume = {abs/2004.09602},
|
||
url = {http://arxiv.org/abs/2004.09602v1},
|
||
date = {2020-04-20},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@article{xiao2022smoothquant,
|
||
title = {SmoothQuant: Accurate and Efficient Post-Training Quantization for Large Language Models},
|
||
author = {Xiao, Guangxuan and Lin, Ji and Seznec, Mickael and Wu, Hao and Demouth, Julien and Han, Song},
|
||
journal = {ArXiv preprint},
|
||
volume = {abs/2211.10438},
|
||
url = {http://arxiv.org/abs/2211.10438v7},
|
||
date = {2022-11-18},
|
||
primaryclass = {cs.CL},
|
||
archiveprefix = {arXiv},
|
||
}
|
||
|
||
@inproceedings{xin-etal-2021-berxit,
|
||
title = {BERxiT: Early Exiting for BERT with Better Fine-Tuning and Extension to Regression},
|
||
author = {Xin, Ji and Tang, Raphael and Yu, Yaoliang and Lin, Jimmy},
|
||
booktitle = {
|
||
Proceedings of the 16th Conference of the European Chapter of the Association for Computational
|
||
Linguistics: Main Volume
|
||
},
|
||
publisher = {Association for Computational Linguistics},
|
||
address = {Online},
|
||
pages = {91--104},
|
||
doi = {10.18653/v1/2021.eacl-main.8},
|
||
url = {https://doi.org/10.18653/v1/2021.eacl-main.8},
|
||
source = {Crossref},
|
||
date = {2021},
|
||
editor = {Merlo, Paola and Tiedemann, Jorg and Tsarfaty, Reut},
|
||
abstract = {
|
||
The slow speed of BERT has motivated much research on accelerating its inference, and the early
|
||
exiting idea has been proposed to make trade-offs between model quality and efficiency. This
|
||
paper aims to address two weaknesses of previous work: (1) existing fine-tuning strategies for
|
||
early exiting models fail to take full advantage of BERT; (2) methods to make exiting decisions
|
||
are limited to classification tasks. We propose a more advanced fine-tuning strategy and a
|
||
learning-to-exit module that extends early exiting to tasks other than classification.
|
||
Experiments demonstrate improved early exiting for BERT, with better trade-offs obtained by the
|
||
proposed fine-tuning strategy, successful application to regression tasks, and the possibility
|
||
to combine it with other acceleration methods. Source code can be found at
|
||
<https://github.com/castorini/berxit>.
|
||
},
|
||
}
|
||
|
||
@article{xinyu,
|
||
title = {The molecular biology of FMRP: new insights into fragile X syndrome},
|
||
author = {Richter, Joel D. and Zhao, Xinyu},
|
||
journal = {Nature Reviews Neuroscience},
|
||
publisher = {Springer Science and Business Media LLC},
|
||
volume = {22},
|
||
number = {4},
|
||
pages = {209--222},
|
||
doi = {10.1038/s41583-021-00432-0},
|
||
issn = {1471-003X,1471-0048},
|
||
url = {https://doi.org/10.1038/s41583-021-00432-0},
|
||
source = {Crossref},
|
||
date = {2021-02-19},
|
||
essn = {1471-0048},
|
||
abstract = {
|
||
Some simple examples for showing how to use tensor decomposition to reconstruct fluid dynamics
|
||
},
|
||
bdsk-url-1 = {https://medium.com/},
|
||
}
|
||
|
||
@inproceedings{xu2018alternating,
|
||
title = {Alternating Multi-bit Quantization for Recurrent Neural Networks},
|
||
author = {
|
||
Xu, Chen and Yao, Jianqiang and Lin, Zhouchen and Ou, Wenwu and Cao, Yuanbin and Wang, Zhirong
|
||
and Zha, Hongbin
|
||
},
|
||
year = {2018},
|
||
booktitle = {
|
||
6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada,
|
||
April 30 - May 3, 2018, Conference Track Proceedings
|
||
},
|
||
publisher = {OpenReview.net},
|
||
url = {https://openreview.net/forum?id=S19dR9x0b},
|
||
bibsource = {dblp computer science bibliography, https://dblp.org},
|
||
biburl = {https://dblp.org/rec/conf/iclr/XuYLOCWZ18.bib},
|
||
timestamp = {Thu, 25 Jul 2019 01:00:00 +0200},
|
||
}
|
||
|
||
@article{yang2020coexploration,
|
||
title = {
|
||
Co-Exploration of Neural Architectures and Heterogeneous ASIC Accelerator Designs Targeting
|
||
Multiple Tasks
|
||
},
|
||
author = {
|
||
Yang, Lei and Yan, Zheyu and Li, Meng and Kwon, Hyoukjun and Lai, Liangzhen and Krishna, Tushar
|
||
and Chandra, Vikas and Jiang, Weiwen and Shi, Yiyu
|
||
},
|
||
publisher = {Wiley},
|
||
pages = {523--587},
|
||
doi = {10.1002/9783527667703.ch67},
|
||
isbn = {9783527411917,9783527667703},
|
||
url = {http://arxiv.org/abs/2002.04116v1},
|
||
date = {2020-02-10},
|
||
primaryclass = {cs.LG},
|
||
archiveprefix = {arXiv},
|
||
eprint = {2002.04116},
|
||
source = {Crossref},
|
||
}
|
||
|
||
@inproceedings{yang2020resolution,
|
||
title = {Resolution Adaptive Networks for Efficient Inference},
|
||
author = {Yang, Le and Han, Yizeng and Chen, Xi and Song, Shiji and Dai, Jifeng and Huang, Gao},
|
||
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
|
||
booktitle = {2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
||
publisher = {IEEE},
|
||
pages = {2366--2375},
|
||
doi = {10.1109/cvpr42600.2020.00244},
|
||
url = {https://doi.org/10.1109/cvpr42600.2020.00244},
|
||
source = {Crossref},
|
||
date = {2020-06},
|
||
}
|
||
|
||
@inproceedings{yao2021hawq,
|
||
title = {HAWQ-V3: Dyadic Neural Network Quantization},
|
||
author = {Yao, Zhewei and Gholami, Amir and Shen, Sheng and Keutzer, Kurt and Mahoney, Michael W.},
|
||
year = {2021},
|
||
booktitle = {Proceedings of the 38th International Conference on Machine Learning (ICML)},
|
||
pages = {11875--11886},
|
||
organization = {PMLR},
|
||
}
|
||
|
||
@article{yu2023efficient,
|
||
title = {Efficient Early Exiting Strategies for Neural Network Acceleration},
|
||
author = {Yu, Jun and Li, Peng and Wang, Zhenhua},
|
||
year = {2023},
|
||
journal = {IEEE Transactions on Neural Networks and Learning Systems},
|
||
publisher = {IEEE},
|
||
}
|
||
|
||
@article{zhang2019autoshrink,
|
||
title = {AutoShrink: A Topology-Aware NAS for Discovering Efficient Neural Architecture},
|
||
author = {
|
||
Zhang, Tunhou and Cheng, Hsin-Pai and Li, Zhenwen and Yan, Feng and Huang, Chengyu and Li, Hai
|
||
and Chen, Yiran
|
||
},
|
||
journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
|
||
booktitle = {
|
||
The Thirty-Fourth AAAI Conference on Artificial Intelligence, AAAI 2020, The Thirty-Second
|
||
Innovative Applications of Artificial Intelligence Conference, IAAI 2020, The Tenth AAAI
|
||
Symposium on Educational Advances in Artificial Intelligence, EAAI 2020, New York, NY, USA,
|
||
February 7-12, 2020
|
||
},
|
||
publisher = {Association for the Advancement of Artificial Intelligence (AAAI)},
|
||
volume = {34},
|
||
number = {04},
|
||
pages = {6829--6836},
|
||
doi = {10.1609/aaai.v34i04.6163},
|
||
issn = {2374-3468,2159-5399},
|
||
url = {https://doi.org/10.1609/aaai.v34i04.6163},
|
||
source = {Crossref},
|
||
date = {2020-04-03},
|
||
bibsource = {dblp computer science bibliography, https://dblp.org},
|
||
biburl = {https://dblp.org/rec/conf/aaai/ZhangCL0HLC20.bib},
|
||
timestamp = {Tue, 02 Feb 2021 00:00:00 +0100},
|
||
}
|
||
|
||
@inproceedings{zhang2020fast,
|
||
title = {Fast Hardware-Aware Neural Architecture Search},
|
||
author = {Zhang, Li Lyna and Yang, Yuqing and Jiang, Yuhang and Zhu, Wenwu and Liu, Yunxin},
|
||
booktitle = {2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
|
||
publisher = {IEEE},
|
||
doi = {10.1109/cvprw50498.2020.00354},
|
||
url = {https://doi.org/10.1109/cvprw50498.2020.00354},
|
||
source = {Crossref},
|
||
date = {2020-06},
|
||
}
|
||
|
||
@article{zhang2021learning,
|
||
title = {Learning-based Efficient Sparsity and Quantization for Neural Network Compression},
|
||
author = {Zhang, Yi and Yang, Jianlei and Song, Linghao and Shi, Yiyu and Wang, Yu and Xie, Yuan},
|
||
year = {2021},
|
||
journal = {IEEE Transactions on Neural Networks and Learning Systems},
|
||
volume = {32},
|
||
number = {9},
|
||
pages = {3980--3994},
|
||
}
|
||
|
||
@article{zhou2021analognets,
|
||
title = {
|
||
AnalogNets: ML-HW Co-Design of Noise-robust TinyML Models and Always-On Analog
|
||
Compute-in-Memory Accelerator
|
||
},
|
||
author = {
|
||
Zhou, Chuteng and Redondo, Fernando Garcia and B\"{u}chel, Julian and Boybat, Irem and Comas,
|
||
Xavier Timoneda and Nandakumar, S. R. and Das, Shidhartha and Sebastian, Abu and Gallo, Manuel
|
||
Le and Whatmough, Paul N.
|
||
},
|
||
url = {http://arxiv.org/abs/2111.06503v1},
|
||
date = {2021-11-10},
|
||
primaryclass = {cs.AR},
|
||
archiveprefix = {arXiv},
|
||
eprint = {2111.06503},
|
||
}
|
||
|
||
@article{zhou2021learningnmfinegrainedstructured,
|
||
title = {Learning N:M Fine-grained Structured Sparse Neural Networks From Scratch},
|
||
author = {
|
||
Zhou, Aojun and Ma, Yukun and Zhu, Junnan and Liu, Jianbo and Zhang, Zhijie and Yuan, Kun and
|
||
Sun, Wenxiu and Li, Hongsheng
|
||
},
|
||
url = {http://arxiv.org/abs/2102.04010v2},
|
||
date = {2021-02-08},
|
||
primaryclass = {cs.CV},
|
||
archiveprefix = {arXiv},
|
||
eprint = {2102.04010},
|
||
}
|
||
|
||
@article{Zhu2017,
|
||
title = {Trained Ternary Quantization},
|
||
author = {Zhu, Chenzhuo and Han, Song and Mao, Huizi and Dally, William J.},
|
||
year = {2017},
|
||
journal = {International Conference on Learning Representations (ICLR)},
|
||
}
|
||
|
||
@inproceedings{Zoph2017,
|
||
title = {Neural Architecture Search with Reinforcement Learning},
|
||
author = {Zoph, Barret and Le, Quoc V.},
|
||
year = {2017},
|
||
booktitle = {International Conference on Learning Representations},
|
||
}
|
||
|
||
@inproceedings{zoph2017neural,
|
||
title = {Neural Architecture Search with Reinforcement Learning},
|
||
author = {Zoph, Barret and Le, Quoc V},
|
||
year = {2017},
|
||
booktitle = {International Conference on Learning Representations (ICLR)},
|
||
}
|
||
|
||
@inproceedings{zoph2018learning,
|
||
title = {Learning Transferable Architectures for Scalable Image Recognition},
|
||
author = {Zoph, Barret and Vasudevan, Vijay and Shlens, Jonathon and Le, Quoc V.},
|
||
booktitle = {2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition},
|
||
publisher = {IEEE},
|
||
pages = {8697--8710},
|
||
doi = {10.1109/cvpr.2018.00907},
|
||
url = {https://doi.org/10.1109/cvpr.2018.00907},
|
||
source = {Crossref},
|
||
date = {2018-06},
|
||
}
|