@inproceedings{679eb2b45b084a828c563942cac1140f,
title = "OpenACC vs the Native Programming on Sunway TaihuLight: A Case Study with GTC-P",
abstract = "Sunway TaihuLight is China's recent top-ranked supercomputer worldwide that was the first to be built entirely with home-grown processors. This supercomputer can be programmed with two approaches: directive-based OpenACC and native programming. These approaches are studied here using GTC-P, a particle-in-cell code for investigating micro-turbulence in magnetic fusion plasmas. We have compared the performance and programming efforts between the OpenACC and the native version of GTC-P. Associated results show that in the OpenACC version, the kernel with irregular memory access becomes the main performance bottleneck due to poor data locality. To address this issue, we have applied two optimizations on the native version: (1) register level communication (RLC); and (2) an 'asynchronization' strategy. With these two optimizations, the native version can achieve up to 2.5X speedup for the memory-bound kernel compared with the OpenACC version. In addition, we have now scaled GTC-P on 4,259,840 cores of TaihuLight and demonstrate performance comparisons with several world-leading supercomputers.",
keywords = "GTC P, OpenACC, Optimization, Sunway TaihuLight",
author = "Linjin Cai and Wang, \{Yi Chao\} and William Tang and Bei Wang and Stephane Ethier and Zhao Liu and James Lin",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 2018 IEEE International Conference on Cluster Computing, CLUSTER 2018 ; Conference date: 10-09-2018 Through 13-09-2018",
year = "2018",
month = oct,
day = "29",
doi = "10.1109/CLUSTER.2018.00021",
language = "English (US)",
series = "Proceedings - IEEE International Conference on Cluster Computing, ICCC",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "88--97",
booktitle = "Proceedings - 2018 IEEE International Conference on Cluster Computing, CLUSTER 2018",
address = "United States",
}