@inproceedings{3317b9b5cd13404e9a43191fd6ae0a79,
title = "Optiongan: Learning joint reward-policy options using generative adversarial inverse reinforcement learning",
abstract = "Reinforcement learning has shown promise in learning policies that can solve complex problems. However, manually specifying a good reward function can be difficult, especially for intricate tasks. Inverse reinforcement learning offers a useful paradigm to learn the underlying reward function directly from expert demonstrations. Yet in reality, the corpus of demonstrations may contain trajectories arising from a diverse set of underlying reward functions rather than a single one. Thus, in inverse reinforcement learning, it is useful to consider such a decomposition. The options framework in reinforcement learning is specifically designed to decompose policies in a similar light. We therefore extend the options framework and propose a method to simultaneously recover reward options in addition to policy options. We leverage adversarial methods to learn joint reward-policy options using only observed expert states. We show that this approach works well in both simple and complex continuous control tasks and shows significant performance increases in one-shot transfer learning.",
author = "Peter Henderson and Chang, {Wei Di} and Bacon, {Pierre Luc} and David Meger and Joelle Pineau and Doina Precup",
note = "Publisher Copyright: Copyright {\textcopyright} 2018, Association for the Advancement of Artificial Intelligence (www.aaai.org). All rights reserved.; 32nd AAAI Conference on Artificial Intelligence, AAAI 2018 ; Conference date: 02-02-2018 Through 07-02-2018",
year = "2018",
language = "English (US)",
series = "32nd AAAI Conference on Artificial Intelligence, AAAI 2018",
publisher = "AAAI press",
pages = "3199--3206",
booktitle = "32nd AAAI Conference on Artificial Intelligence, AAAI 2018",
}