@inproceedings{9ad38b7cfca74230aff96d0636d64e5a,
title = "Entropy Regularization for Population Estimation",
abstract = "Entropy regularization is known to improve exploration in sequential decision-making problems. We show that this same mechanism can also lead to nearly unbiased and lower-variance estimates of the mean reward in the optimize-and-estimate structured bandit setting. Mean reward estimation (i.e., population estimation) tasks have recently been shown to be essential for public policy settings where legal constraints often require precise estimates of population metrics. We show that leveraging entropy and KL divergence can yield a better trade-off between reward and estimator variance than existing baselines, all while remaining nearly unbiased. These properties of entropy regularization illustrate an exciting potential for bridging the optimal exploration and estimation literatures.",
author = "Ben Chugg and Peter Henderson and Jacob Goldin and Ho, {Daniel E.}",
note = "Publisher Copyright: Copyright {\textcopyright} 2023, Association for the Advancement of Artificial Intelligence (www.aaai.org). All rights reserved.; 37th AAAI Conference on Artificial Intelligence, AAAI 2023 ; Conference date: 07-02-2023 Through 14-02-2023",
year = "2023",
month = jun,
day = "27",
doi = "10.1609/aaai.v37i10.2636126438",
language = "English (US)",
series = "Proceedings of the 37th AAAI Conference on Artificial Intelligence, AAAI 2023",
publisher = "AAAI press",
pages = "12198--12204",
editor = "Brian Williams and Yiling Chen and Jennifer Neville",
booktitle = "AAAI-23 Technical Tracks 10",
}