@inproceedings{4e995f4556124551ad69b1f7d0c5a0b9,
title = "Attention-Driven Training-Free Efficiency Enhancement of Diffusion Models",
abstract = "Diffusion models (DMs) have exhibited superior performance in generating high-quality and diverse images. However, this exceptional performance comes at the cost of expensive generation process, particularly due to the heavily used attention module in leading models. Existing works mainly adopt a retraining process to enhance DM efficiency. This is computationally expensive and not very scalable. To this end, we introduce the Attention-driven Training-free Efficient Diffusion Model (AT-EDM) framework that leverages attention maps to perform run-time pruning of redundant tokens, without the need for any retraining. Specifically, for single-denoising-step pruning, we develop a novel ranking algorithm, Generalized Weighted Page Rank (G-WPR), to identify redundant tokens, and a similarity-based recovery method to restore tokens for the convolution operation. In addition, we propose a Denoising-Steps-Aware Pruning (DSAP) approach to adjust the pruning budget across different denoising timesteps for better generation quality. Extensive evaluations show that AT-EDM performs favorably against prior art in terms of efficiency (e.g., 38.8\% FLOPs saving and up to 1.53× speed-up over Stable Diffusion XL) while maintaining nearly the same FID and CLIP scores as the full model. Project webpage: https://atedm.github.io.",
keywords = "attention map, diffusion model, efficiency, token pruning, training-free",
author = "Hongjie Wang and Difan Liu and Yan Kang and Yijun Li and Zhe Lin and Jha, \{Niraj K.\} and Yuchen Liu",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2024 ; Conference date: 16-06-2024 Through 22-06-2024",
year = "2024",
doi = "10.1109/CVPR52733.2024.01522",
language = "English (US)",
isbn = "9798350353006",
series = "Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition",
publisher = "IEEE Computer Society",
pages = "16080--16089",
booktitle = "Proceedings - 2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2024",
address = "United States",
}