@inproceedings{ddb78c45e15a4b36be7c7a88b8fff366,
title = "Rethinking Machine Learning Benchmarks in the Context of Professional Codes of Conduct",
abstract = "Benchmarking efforts for machine learning have often mimicked (or even explicitly used) professional licensing exams to assess capabilities in a given area, focusing primarily on accuracy as the metric of choice. However, this approach neglects a variety of essential skills required in professional settings. We propose that professional codes of conduct and rules can guide machine learning researchers to address potential gaps in benchmark construction. These guidelines frequently account for situations professionals may encounter and must handle with care. A model may excel on an exam but still fall short in critical scenarios, deemed unacceptable under professional codes or rules. To motivate this idea, we conduct a case study and comparative examination of machine translation in legal settings. We point out several areas where standard deployments and benchmarks do not assess key requirements under professional rules. We suggest further refinements that would bring the two closer together, including requiring a measurement of uncertainty so that models opt out of uncertain translations. We then share broader insights on constructing and deploying foundation models, particularly in critical domains like law and legal translation.",
keywords = "AI & Law, AI & Society, Benchmarking, Evaluation, Machine Translation",
author = "Peter Henderson and Jieru Hu and Mona Diab and Joelle Pineau",
note = "Publisher Copyright: {\textcopyright} 2024 Owner/Author.; 3rd Symposium on Computer Science and Law, CSLAW 2024 ; Conference date: 12-03-2024 Through 13-03-2024",
year = "2024",
month = mar,
day = "12",
doi = "10.1145/3614407.3643708",
language = "English (US)",
series = "CSLAW 2024 - Proceedings of the 3rd Symposium on Computer Science and Law",
publisher = "Association for Computing Machinery, Inc",
pages = "109--120",
booktitle = "CSLAW 2024 - Proceedings of the 3rd Symposium on Computer Science and Law",
}