@inproceedings{f9af9bb1020d4846bce007e43665f41e,
title = "Performance and portability studies with OpenACC accelerated version of GTC-P",
abstract = "Accelerator-based heterogeneous computing is of paramount importance to High Performance Computing. The increasing complexity of the cluster architectures requires more generic, high-level programming models. OpenACC is a directive-based parallel programming model, which provides performance on and portability across a wide variety of platforms, including GPU, multicore CPU, and many-core processors. GTC-P is a discovery-science-capable real-world application code based on the Particle-In-Cell (PIC) algorithm that is well-established in the HPC area. Several native versions of GTC-P have been developed for supercomputers on TOP500 with different architectures, including Titan, Mira, etc. Motivated by the state-of-Art portability, we implemented the first OpenACC version of GTC-P and evaluated its performance portability across NVIDIA GPUS, Intel x86 and OpenPOWER CPUs. In this paper, we also proposed two key optimization methods for OpenACC implementation of PIC algorithm on multicore CPU and GPU including removing atomic operation and taking advantage of shared memory. OpenACC shows both impressive productivity and performance in a perspective of portability and scalability. The OpenACC version achieves more than 90\% performance compared with the native versions with only about 300 LOC.",
keywords = "CUDA, GPU, GTC-P, Gyrokinetic PIC code, OpenACC, OpenPOWER",
author = "Yueming Wei and Yichao Wang and Linjin Cai and William Tang and Bei Wang and Stephane Ethier and Simon See and James Lin",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 17th International Conference on Parallel and Distributed Computing, Applications and Technologies, PDCAT 2016 ; Conference date: 16-12-2016 Through 18-12-2016",
year = "2016",
month = jul,
day = "2",
doi = "10.1109/PDCAT.2016.019",
language = "English (US)",
series = "Parallel and Distributed Computing, Applications and Technologies, PDCAT Proceedings",
publisher = "IEEE Computer Society",
pages = "13--18",
editor = "Hong Shen and Hong Shen and Yingpeng Sang and Hui Tian",
booktitle = "Proceedings - 17th International Conference on Parallel and Distributed Computing, Applications and Technologies, PDCAT 2016",
address = "United States",
}