@inproceedings{4dbb1a1874ce4d01b314a9ee6d8e6ac7,
title = "Rethinking the Faster R-CNN Architecture for Temporal Action Localization",
abstract = "We propose TAL-Net, an improved approach to temporal action localization in video that is inspired by the Faster RCNN object detection framework. TAL-Net addresses three key shortcomings of existing approaches: (1) we improve receptive field alignment using a multi-scale architecture that can accommodate extreme variation in action durations; (2) we better exploit the temporal context of actions for both proposal generation and action classification by appropriately extending receptive fields; and (3) we explicitly consider multi-stream feature fusion and demonstrate that fusing motion late is important. We achieve state-of-the-art performance for both action proposal and localization on THUMOS'14 detection benchmark and competitive performance on ActivityNet challenge.",
author = "Chao, {Yu Wei} and S. Vijayanarasimhan and Bryan Seybold and Ross, {David A.} and Jia Deng and Rahul Sukthankar",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 31st Meeting of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2018 ; Conference date: 18-06-2018 Through 22-06-2018",
year = "2018",
month = dec,
day = "14",
doi = "10.1109/CVPR.2018.00124",
language = "English (US)",
series = "Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition",
publisher = "IEEE Computer Society",
pages = "1130--1139",
booktitle = "Proceedings - 2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2018",
address = "United States",
}