@inproceedings{171404033a25437b8f5350821e4a05f1,
title = "Decoupled software pipelining creates parallelization opportunities",
abstract = "Decoupled Software Pipelining (DSWP) is one approach to automatically extract threads from loops. It partitions loops into long-running threads that communicate in a pipelined manner via inter-core queues. This work recognizes that DSWP can also be an enabling transformation for other loop parallelization techniques. This use of DSWP, called DSWP+, splits a loop into new loops with dependence patterns amenable to parallelization using techniques that were originally either inapplicable or poorly-performing. By parallelizing each stage of the DSWP+ pipeline using (potentially) different techniques, not only is the benefit of DSWP increased, but the applicability and performance of other parallelization techniques are enhanced. This paper evaluates DSWP+ as an enabling framework for other transformations by applying it in conjunction with DOALL, LOCALWRITE, and SpecDOALL to individual stages of the pipeline. This paper demonstrates significant performance gains on a commodity 8-core multicore machine running a variety of codes transformed with DSWP+.",
keywords = "DSWP, enabling transformation, multicore, parallelization, speculation",
author = "Jialu Huang and Arun Raman and Jablin, {Thomas B.} and Yun Zhang and Hung, {Tzu Han} and August, {David I.}",
year = "2010",
doi = "10.1145/1772954.1772973",
language = "English (US)",
isbn = "9781605586359",
series = "Proceedings of the 2010 CGO - The 8th International Symposium on Code Generation and Optimization",
pages = "121--130",
booktitle = "Proceedings of the 2010 CGO - The 8th International Symposium on Code Generation and Optimization",
note = "8th International Symposium on Code Generation and Optimization, CGO 2010 ; Conference date: 24-04-2010 Through 28-04-2010",
}