@inproceedings{1d1e0eb719d24f3c9de14212d08c9dde,
title = "Taming the monster: A fast and simple algorithm for contextual bandits",
abstract = "We present a new algorithm for the contextual bandit learning problem, where the learner repeatedly takes one of K actions in response to the observed context, and observes the reward only for that action. Our method assumes access to an oracle for solving fully supervised cost-sensitive classification problems and achieves the statistically optimal regret guarantee with only {\~O}(√KT) oracle calls across all T rounds. By doing so, we obtain the most practical contextual bandit learning algorithm amongst approaches that work for general policy classes. We conduct a proof-of-concept experiment which demonstrates the excellent computational and statistical performance of (an online variant of) our algorithm relative to several strong baselines.",
author = "Alekh Agarwal and Daniel Hsu and Satyen Kale and John Langford and Lihong Li and Schapire, {Robert E.}",
note = "Publisher Copyright: Copyright 2014 by the author(s).; 31st International Conference on Machine Learning, ICML 2014 ; Conference date: 21-06-2014 Through 26-06-2014",
year = "2014",
language = "English (US)",
series = "31st International Conference on Machine Learning, ICML 2014",
publisher = "International Machine Learning Society (IMLS)",
pages = "3611--3619",
booktitle = "31st International Conference on Machine Learning, ICML 2014",
}