@inproceedings{77abd3fa53b249469f79728d1e754be2,
title = "PG3: Policy-Guided Planning for Generalized Policy Generation",
abstract = "A longstanding objective in classical planning is to synthesize policies that generalize across multiple problems from the same domain. In this work, we study generalized policy search-based methods with a focus on the score function used to guide the search over policies. We demonstrate limitations of two score functions - policy evaluation and plan comparison - and propose a new approach that overcomes these limitations. The main idea behind our approach, Policy-Guided Planning for Generalized Policy Generalization (PG3), is that a candidate policy should be used to guide planning on training problems as a mechanism for evaluating that candidate. Theoretical results in a simplified setting give conditions under which PG3 is optimal or admissible. We then study a specific instantiation of policy search where planning problems are PDDL-based and policies are lifted decision lists. Empirical results in six domains confirm that PG3 learns generalized policies more efficiently and effectively than several baselines.",
author = "Ryan Yang and Tom Silver and Aidan Curtis and Tomas Lozano-Perez and Leslie Kaelbling",
note = "Publisher Copyright: {\textcopyright} 2022 International Joint Conferences on Artificial Intelligence. All rights reserved.; 31st International Joint Conference on Artificial Intelligence, IJCAI 2022 ; Conference date: 23-07-2022 Through 29-07-2022",
year = "2022",
doi = "10.24963/ijcai.2022/650",
language = "English (US)",
series = "IJCAI International Joint Conference on Artificial Intelligence",
publisher = "International Joint Conferences on Artificial Intelligence",
pages = "4686--4692",
editor = "\{De Raedt\}, Luc and \{De Raedt\}, Luc",
booktitle = "Proceedings of the 31st International Joint Conference on Artificial Intelligence, IJCAI 2022",
}