@inproceedings{d1956712b548473dbb70f2f022fed610,

title = "Apprenticeship learning using linear programming",

abstract = "In apprenticeship learning, the goal is to learn a policy in a Markov decision process that is at least as good as a policy demonstrated by an expert. The difficulty arises in that the MDP's true reward function is assumed to be unknown. We show how to frame apprenticeship learning as a linear programming problem, and show that using an off-the-shelf LP solver to solve this problem results in a substantial improvement in running time over existing methods - up to two orders of magnitude faster in our experiments. Additionally, our approach produces stationary policies, while all existing methods for apprenticeship learning output policies that are {"}mixed{"}, i.e. randomized combinations of stationary policies. The technique used is general enough to convert any mixed policy to a stationary policy.",

author = "Umar Syed and Michael Bowling and Schapire, {Robert E.}",

year = "2008",

doi = "10.1145/1390156.1390286",

language = "English (US)",

isbn = "9781605582054",

series = "Proceedings of the 25th International Conference on Machine Learning",

publisher = "Association for Computing Machinery (ACM)",

pages = "1032--1039",

booktitle = "Proceedings of the 25th International Conference on Machine Learning",

address = "United States",

note = "25th International Conference on Machine Learning ; Conference date: 05-07-2008 Through 09-07-2008",

}