cs249r_book/contents/data_engineering/data_engineering.bib

@article{aledhari2020federated,
 author = {Aledhari, Mohammed and Razzak, Rehma and Parizi, Reza M. and Saeed, Fahad},
 bdsk-url-1 = {https://doi.org/10.1109/access.2020.3013541},
 doi = {10.1109/access.2020.3013541},
 issn = {2169-3536},
 journal = {\#IEEE\_O\_ACC\#},
 pages = {140699--140725},
 publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
 source = {Crossref},
 title = {Federated Learning: {A} Survey on Enabling Technologies, Protocols, and Applications},
 url = {https://doi.org/10.1109/access.2020.3013541},
 volume = {8},
 year = {2020}
}

@inproceedings{ardila2020common,
 address = {Marseille, France},
 author = {Ardila, Rosana  and Branson, Megan  and Davis, Kelly  and Kohler, Michael  and Meyer, Josh  and Henretty, Michael  and Morais, Reuben  and Saunders, Lindsay  and Tyers, Francis  and Weber, Gregor},
 booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference},
 isbn = {979-10-95546-34-4},
 language = {English},
 pages = {4218--4222},
 publisher = {European Language Resources Association},
 title = {Common Voice: A Massively-Multilingual Speech Corpus},
 url = {https://aclanthology.org/2020.lrec-1.520},
 year = {2020}
}

@article{bender2018data,
 address = {Cambridge, MA},
 author = {Bender, Emily M.  and Friedman, Batya},
 doi = {10.1162/tacl\_a\_00041},
 journal = {Transactions of the Association for Computational Linguistics},
 pages = {587--604},
 publisher = {MIT Press},
 title = {Data Statements for Natural Language Processing: Toward Mitigating System Bias and Enabling Better Science},
 url = {https://aclanthology.org/Q18-1041},
 volume = {6},
 year = {2018}
}

@article{chapelle2009semisupervised,
 author = {Chapelle, O. and Scholkopf, B. and Zien, Eds., A.},
 bdsk-url-1 = {https://doi.org/10.1109/tnn.2009.2015974},
 doi = {10.1109/tnn.2009.2015974},
 issn = {1045-9227},
 journal = {IEEE Trans. Neural Networks},
 number = {3},
 pages = {542--542},
 publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
 source = {Crossref},
 title = {Semi-Supervised Learning {(Chapelle,} {O.} et al., Eds.; 2006) {[Book} reviews]},
 url = {https://doi.org/10.1109/tnn.2009.2015974},
 volume = {20},
 year = {2009}
}

@article{gebru2021datasheets,
 author = {Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and III, Hal Daum\'e and Crawford, Kate},
 bdsk-url-1 = {https://doi.org/10.1145/3458723},
 doi = {10.1145/3458723},
 issn = {0001-0782, 1557-7317},
 journal = {Commun. ACM},
 number = {12},
 pages = {86--92},
 publisher = {Association for Computing Machinery (ACM)},
 source = {Crossref},
 title = {Datasheets for datasets},
 url = {https://doi.org/10.1145/3458723},
 volume = {64},
 year = {2021}
}

@misc{googleinformation,
 author = {Google},
 bdsk-url-1 = {https://blog.google/documents/83/},
 title = {Information quality content moderation},
 url = {https://blog.google/documents/83/}
}

@incollection{holland2020dataset,
 author = {Holland, Sarah and Hosny, Ahmed and Newman, Sarah and Joseph, Joshua and Chmielinski, Kasia},
 bdsk-url-1 = {https://doi.org/10.5040/9781509932771.ch-001},
 booktitle = {Data Protection and Privacy},
 doi = {10.5040/9781509932771.ch-001},
 isbn = {9781509932740, 9781509932764, 9781509932757, 9781509932771},
 journal = {Data Protection and Privacy},
 publisher = {Hart Publishing},
 source = {Crossref},
 subtitle = {A Framework to Drive Higher Data Quality Standards},
 title = {The Dataset Nutrition Label},
 url = {https://doi.org/10.5040/9781509932771.ch-001},
 year = {2020}
}

@inproceedings{johnsonroberson2017driving,
 author = {Johnson-Roberson, Matthew and Barto, Charles and Mehta, Rounak and Sridhar, Sharath Nittur and Rosaen, Karl and Vasudevan, Ram},
 bdsk-url-1 = {https://doi.org/10.1109/icra.2017.7989092},
 booktitle = {2017 IEEE International Conference on Robotics and Automation (ICRA)},
 doi = {10.1109/icra.2017.7989092},
 journal = {2017 IEEE International Conference on Robotics and Automation (ICRA)},
 publisher = {IEEE},
 source = {Crossref},
 title = {Driving in the Matrix: {Can} virtual worlds replace human-generated annotations for real world tasks?},
 url = {https://doi.org/10.1109/icra.2017.7989092},
 year = {2017}
}

@article{krishnan2022selfsupervised,
 author = {Krishnan, Rayan and Rajpurkar, Pranav and Topol, Eric J.},
 bdsk-url-1 = {https://doi.org/10.1038/s41551-022-00914-1},
 doi = {10.1038/s41551-022-00914-1},
 issn = {2157-846X},
 journal = {Nat. Biomed. Eng.},
 number = {12},
 pages = {1346--1352},
 publisher = {Springer Science and Business Media LLC},
 source = {Crossref},
 title = {Self-supervised learning in medicine and healthcare},
 url = {https://doi.org/10.1038/s41551-022-00914-1},
 volume = {6},
 year = {2022}
}

@article{northcutt2021pervasive,
 author = {Northcutt, Curtis G and Athalye, Anish and Mueller, Jonas},
 bdsk-url-1 = {nbsp;},
 doi = {https://doi.org/10.48550/arXiv.2103.14749 arXiv-issued DOI via DataCite},
 journal = {arXiv},
 title = {Pervasive Label Errors in Test Sets Destabilize Machine Learning Benchmarks},
 year = {2021}
}

@inproceedings{pushkarna2022data,
 author = {Pushkarna, Mahima and Zaldivar, Andrew and Kjartansson, Oddur},
 bdsk-url-1 = {https://doi.org/10.1145/3531146.3533231},
 booktitle = {2022 ACM Conference on Fairness, Accountability, and Transparency},
 doi = {10.1145/3531146.3533231},
 journal = {2022 ACM Conference on Fairness, Accountability, and Transparency},
 publisher = {ACM},
 source = {Crossref},
 title = {Data Cards: {Purposeful} and Transparent Dataset Documentation for Responsible {AI}},
 url = {https://doi.org/10.1145/3531146.3533231},
 year = {2022}
}

@inproceedings{ratner2018snorkel,
 author = {Ratner, Alex and Hancock, Braden and Dunnmon, Jared and Goldman, Roger and R\'e, Christopher},
 booktitle = {Proceedings of the Second Workshop on Data Management for End-To-End Machine Learning},
 doi = {10.1145/3209889.3209898},
 journal = {Proceedings of the Second Workshop on Data Management for End-To-End Machine Learning},
 publisher = {ACM},
 source = {Crossref},
 subtitle = {Weak Supervision for Multi-Task Learning},
 title = {Snorkel {MeTaL}},
 url = {https://doi.org/10.1145/3209889.3209898},
 year = {2018}
}

@inproceedings{victor2019machine,
 author = {Victor S. Sheng and Jing Zhang},
 bibsource = {dblp computer science bibliography, https://dblp.org},
 biburl = {https://dblp.org/rec/conf/aaai/Sheng019.bib},
 booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI} 2019, The Thirty-First Innovative Applications of Artificial Intelligence Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii, USA, January 27 - February 1, 2019},
 doi = {10.1609/aaai.v33i01.33019837},
 pages = {9837--9843},
 publisher = {{AAAI} Press},
 timestamp = {Wed, 25 Sep 2019 01:00:00 +0200},
 title = {Machine Learning with Crowdsourcing: {A} Brief Summary of the Past Research and Future Directions},
 url = {https://doi.org/10.1609/aaai.v33i01.33019837},
 year = {2019}
}