@inproceedings{9c4daabbe670413d8262df5862c7bb1d,
title = "An online primal-dual method for discounted Markov decision processes",
abstract = "We consider the online solution of discounted Markov decision processes (MDP). We focus on the black-box learning model where transition probabilities and state transition cost are unknown. Instead, a simulator is available to generate random state transitions under given actions. We propose a stochastic primal-dual algorithm for solving the linear formulation of the Bellman equation. The algorithm updates the primal and dual iterates by using sample state transitions and sample costs generated by the simulator. We provide a thresholding procedure that recovers the exact optimal policy from the dual iterates with high probability.",
author = "Mengdi Wang and Yichen Chen",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 55th IEEE Conference on Decision and Control, CDC 2016 ; Conference date: 12-12-2016 Through 14-12-2016",
year = "2016",
month = dec,
day = "27",
doi = "10.1109/CDC.2016.7798956",
language = "English (US)",
series = "2016 IEEE 55th Conference on Decision and Control, CDC 2016",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "4516--4521",
booktitle = "2016 IEEE 55th Conference on Decision and Control, CDC 2016",
address = "United States",
}