{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,23]],"date-time":"2024-09-23T04:15:45Z","timestamp":1727064945261},"reference-count":55,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T00:00:00Z","timestamp":1569888000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T00:00:00Z","timestamp":1569888000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,10,1]],"date-time":"2019-10-01T00:00:00Z","timestamp":1569888000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,10]]},"DOI":"10.1109\/iccv.2019.00712","type":"proceedings-article","created":{"date-parts":[[2020,2,28]],"date-time":"2020-02-28T10:27:52Z","timestamp":1582885672000},"source":"Crossref","is-referenced-by-count":165,"title":["Relation Distillation Networks for Video Object Detection"],"prefix":"10.1109","author":[{"given":"Jiajun","family":"Deng","sequence":"first","affiliation":[]},{"given":"Yingwei","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Ting","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Wengang","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Houqiang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Mei","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"NIPS"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"ref33","article-title":"Deepid-net: multi-stage and deformable deep convolutional neural networks for object detection","author":"ouyang","year":"2014","journal-title":"arXiv 1409 3505"},{"key":"ref32","author":"mallat","year":"1999","journal-title":"A Wavelet Tour of Signal Processing"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"ref30","article-title":"Ssd: Single shot multibox detector","author":"liu","year":"2016","journal-title":"ECCV"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.590"},{"key":"ref36","article-title":"Automatic differentiation in pytorch","author":"paszke","year":"2017","journal-title":"Workshop on Machine Learning Systems at NIPS"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.497"},{"key":"ref34","article-title":"Learning deep intrinsic video representation by exploring temporal coherence and graph structure","author":"pan","year":"2016","journal-title":"IJCAI"},{"key":"ref28","article-title":"Microsoft COCO: Common objects in context","author":"lin","year":"2014","journal-title":"ECCV"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"ref29","article-title":"Mobile video object detection with temporally-aware feature maps","author":"liu","year":"2018","journal-title":"CVPR"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01172"},{"key":"ref1","article-title":"Object detection in video with spatiotemporal sampling networks","author":"bertasius","year":"2018","journal-title":"ECCV"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.101"},{"key":"ref22","article-title":"Imagenet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"NIPS"},{"key":"ref21","article-title":"T-cnn: Tubelets with convolutional neural networks for object detection from videos","author":"kang","year":"2017","journal-title":"IEEE Trans on CSVT"},{"key":"ref24","article-title":"Recurrent tubelet proposal and recognition networks for action detection","author":"li","year":"2018","journal-title":"ECCV"},{"key":"ref23","article-title":"Cornernet: Detecting objects as paired keypoints","author":"law","year":"2018","journal-title":"ECCV"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"ref25","article-title":"Unified spatio-temporal attention networks for action recognition in videos","author":"li","year":"2018","journal-title":"IEEE Transactions on Multimedia"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.650"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.441"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.52"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00753"},{"key":"ref52","article-title":"Exploring visual relationship for image captioning","author":"yao","year":"2018","journal-title":"ECCV"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298676"},{"key":"ref13","article-title":"Seq-nms for video object detection","author":"han","year":"2016","journal-title":"arXiv 1602 08465"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2389824"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00378"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref4","article-title":"Semantic image segmentation with deep convolutional nets and fully connected crfs","author":"chen","year":"2015","journal-title":"ICLRE"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00644"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.89"},{"key":"ref5","article-title":"R-fcn: Object detection via region-based fully convolutional networks","author":"dai","year":"2016","journal-title":"NIPS"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.330"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.316"},{"key":"ref49","article-title":"Video object detection with an aligned spatial-temporal memory","author":"xiao","year":"2018","journal-title":"ECCV"},{"key":"ref9","article-title":"Convolutional sequence to sequence learning","author":"gehring","year":"2017","journal-title":"ICML"},{"key":"ref46","article-title":"Fully motion-aware network for video object detection","author":"wang","year":"2018","journal-title":"ECCV"},{"key":"ref45","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"NIPS"},{"key":"ref48","article-title":"Videos as space-time region graphs","author":"wang","year":"2018","journal-title":"ECCV"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"ref42","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"NIPS"},{"key":"ref41","article-title":"Overfeat: Integrated recognition, localization and detection using convolutional networks","author":"sermanet","year":"2014","journal-title":"ICLRE"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref43","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"ICLRE"}],"event":{"name":"2019 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Seoul, Korea (South)","start":{"date-parts":[[2019,10,27]]},"end":{"date-parts":[[2019,11,2]]}},"container-title":["2019 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8972782\/9008105\/09008824.pdf?arnumber=9008824","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T21:52:04Z","timestamp":1658094724000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9008824\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,10]]},"references-count":55,"URL":"https:\/\/doi.org\/10.1109\/iccv.2019.00712","relation":{},"subject":[],"published":{"date-parts":[[2019,10]]}}}