@inproceedings{d1956712b548473dbb70f2f022fed610,
title = "Apprenticeship learning using linear programming",
abstract = "In apprenticeship learning, the goal is to learn a policy in a Markov decision process that is at least as good as a policy demonstrated by an expert. The difficulty arises in that the MDP's true reward function is assumed to be unknown. We show how to frame apprenticeship learning as a linear programming problem, and show that using an off-the-shelf LP solver to solve this problem results in a substantial improvement in running time over existing methods - up to two orders of magnitude faster in our experiments. Additionally, our approach produces stationary policies, while all existing methods for apprenticeship learning output policies that are {"}mixed{"}, i.e. randomized combinations of stationary policies. The technique used is general enough to convert any mixed policy to a stationary policy.",
author = "Umar Syed and Michael Bowling and Schapire, {Robert E.}",
year = "2008",
doi = "10.1145/1390156.1390286",
language = "English (US)",
isbn = "9781605582054",
series = "Proceedings of the 25th International Conference on Machine Learning",
publisher = "Association for Computing Machinery (ACM)",
pages = "1032--1039",
booktitle = "Proceedings of the 25th International Conference on Machine Learning",
address = "United States",
note = "25th International Conference on Machine Learning ; Conference date: 05-07-2008 Through 09-07-2008",
}