{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,5]],"date-time":"2025-04-05T09:49:07Z","timestamp":1743846547638,"version":"3.37.3"},"reference-count":153,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. on Image Process."],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/tip.2020.3016485","type":"journal-article","created":{"date-parts":[[2020,8,28]],"date-time":"2020-08-28T20:08:36Z","timestamp":1598645316000},"page":"8680-8695","source":"Crossref","is-referenced-by-count":169,"title":["Video Coding for Machines: A Paradigm of Collaborative Compression and Intelligent Analytics"],"prefix":"10.1109","volume":"29","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4491-2023","authenticated-orcid":false,"given":"Lingyu","family":"Duan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0468-9576","authenticated-orcid":false,"given":"Jiaying","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1692-0069","authenticated-orcid":false,"given":"Wenhan","family":"Yang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4234-6099","authenticated-orcid":false,"given":"Tiejun","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Wen","family":"Gao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2018.8451465"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2934565"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2601264"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/IVMSPW.2016.7528223"},{"key":"ref31","article-title":"Density modeling of images using a generalized normalization transformation","author":"ball\u00e9","year":"2015","journal-title":"arXiv 1511 06281"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/79.733497"},{"key":"ref37","article-title":"Non-local attention optimized deep image compression","author":"liu","year":"2019","journal-title":"arXiv 1904 09757"},{"key":"ref36","first-page":"1","article-title":"Practical stacked non-local attention modules for image compression","author":"liu","year":"2019","journal-title":"Proc CVPRW"},{"key":"ref35","article-title":"Variable rate image compression with recurrent neural networks","author":"toderici","year":"2015","journal-title":"arXiv 1511 06085"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.577"},{"year":"2003","key":"ref28"},{"year":"1994","key":"ref27"},{"year":"2013","key":"ref29"},{"key":"ref20","first-page":"1","article-title":"Review of development of visual neural computing","volume":"1","author":"xu","year":"2017","journal-title":"Comput Eng Appl"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1117\/12.965361"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TCOM.1974.1092258"},{"year":"1995","key":"ref24"},{"year":"1990","key":"ref23"},{"key":"ref101","first-page":"217","article-title":"Learning what and where to draw","author":"reed","year":"2016","journal-title":"Proc NIPS"},{"year":"1999","key":"ref26"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00899"},{"year":"1993","key":"ref25"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2013.46"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/DCC.2009.33"},{"key":"ref153","first-page":"1","article-title":"Towards coding for human and machine vision: A scalable image coding approach","author":"hu","year":"2020","journal-title":"Proc IEEE Int Conf Multimedia Expo (ICME)"},{"key":"ref150","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"ref152","doi-asserted-by":"publisher","DOI":"10.1109\/ICME46284.2020.9102843"},{"key":"ref151","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2017.8273580"},{"key":"ref146","doi-asserted-by":"publisher","DOI":"10.1109\/DCC.2019.00084"},{"key":"ref147","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2873102"},{"key":"ref148","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2745203"},{"key":"ref149","doi-asserted-by":"publisher","DOI":"10.1145\/3365212"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2856628"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1016\/j.image.2012.11.002"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2018.2818012"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.15224\/978-1-63248-061-3-23"},{"year":"2013","key":"ref55"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2794203"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-011-0472-9"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/2072298.2072372"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP.2018.8698740"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/DCC.2018.00021"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2014.2371951"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2018.8451286"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2882923"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2920603"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-011-0453-z"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2016.2633377"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2019.2961504"},{"year":"2015","key":"ref46"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/76.927426"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2011.940881"},{"year":"2019","key":"ref47"},{"key":"ref42","article-title":"End-to-end optimized image compression","author":"ball\u00e9","year":"2016","journal-title":"arXiv 1611 01704"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/PCS.2016.7906310"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00339"},{"key":"ref43","article-title":"Variational image compression with a scale hyperprior","author":"ball\u00e9","year":"2018","journal-title":"arXiv 1802 01436"},{"key":"ref127","article-title":"Learned scalable image compression with bidirectional context disentanglement network","author":"zhang","year":"2018","journal-title":"arXiv 1812 09443"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2007.905532"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1109\/79.952804"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1109\/76.911157"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2019.00012"},{"year":"2019","key":"ref72"},{"key":"ref129","article-title":"Learned video compression via joint spatial-temporal correlation exploration","author":"liu","year":"2019","journal-title":"arXiv 1912 06348"},{"year":"2019","key":"ref71"},{"key":"ref128","doi-asserted-by":"publisher","DOI":"10.1109\/PCS48520.2019.8954536"},{"year":"2019","key":"ref70"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1360\/N112018-00025"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/MMSP.2018.8547134"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2904360"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP47243.2019.8965808"},{"key":"ref133","article-title":"An emerging coding paradigm VCM: A scalable coding approach beyond feature and signal","author":"xia","year":"2020","journal-title":"arXiv 2001 03004"},{"key":"ref134","article-title":"Towards coding for human and machine vision: A scalable image coding approach","author":"hu","year":"2020","journal-title":"arXiv 2001 02915"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803255"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.81"},{"key":"ref132","article-title":"Intermediate deep feature compression: Toward intelligent sensing","author":"chen","year":"2019","journal-title":"IEEE Trans Image Process"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref136","doi-asserted-by":"publisher","DOI":"10.1145\/3132734.3132739"},{"key":"ref135","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350849"},{"journal-title":"Lempel Ziv Markov chain algorithm","year":"2020","author":"pavlov","key":"ref138"},{"key":"ref137","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00248"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2013.2296532"},{"key":"ref139","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/871"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1117\/12.935619"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.4108\/ICST.MOBIMEDIA2009.7406"},{"year":"2011","key":"ref63"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"ref140","article-title":"Deep generative image models using a Laplacian pyramid of adversarial networks","author":"denton","year":"2015","journal-title":"arXiv 1506 05751"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2713410"},{"key":"ref141","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2865304"},{"key":"ref66","first-page":"1","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"Proc ICLR"},{"key":"ref142","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref67","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc NIPS"},{"key":"ref143","article-title":"YOLOv3: An incremental improvement","author":"redmon","year":"2018","journal-title":"arXiv 1804 02767"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref144","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2018.2872984"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2012.2221191"},{"year":"2019","key":"ref69"},{"key":"ref145","doi-asserted-by":"publisher","DOI":"10.1109\/DCC.2019.00080"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"560","DOI":"10.1109\/TCSVT.2003.815165","article-title":"overview of the h.264\/avc video coding standard","volume":"13","author":"wiegand","year":"2003","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1145\/3126686.3126737"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.310"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.5194\/isprs-archives-XLII-2-W16-3-2019"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.244"},{"key":"ref107","article-title":"Deep multi-scale video prediction beyond mean square error","author":"mathieu","year":"2015","journal-title":"arXiv 1511 05440"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00917"},{"key":"ref106","article-title":"Deep predictive coding networks for video prediction and unsupervised learning","author":"lotter","year":"2016","journal-title":"arXiv 1605 08104"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"ref105","first-page":"64","article-title":"Unsupervised learning for physical interaction through video prediction","author":"finn","year":"2016","journal-title":"Proc NIPS"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00070"},{"key":"ref104","first-page":"843","article-title":"Unsupervised learning of video representations using LSTMs","author":"srivastava","year":"2015","journal-title":"Proc ICML"},{"key":"ref90","article-title":"Progressive growing of GANs for improved quality, stability, and variation","author":"karras","year":"2017","journal-title":"arXiv 1710 10196"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00923"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00018"},{"key":"ref111","article-title":"Transformation-based models of video sequences","author":"van amersfoort","year":"2017","journal-title":"arXiv 1701 08435"},{"key":"ref112","first-page":"613","article-title":"Generating videos with scene dynamics","author":"vondrick","year":"2016","journal-title":"Proc NIPS"},{"key":"ref110","first-page":"667","article-title":"Dynamic filter networks","author":"de brabandere","year":"2016","journal-title":"Proc NIPS"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00359"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00870"},{"key":"ref96","first-page":"1","article-title":"Learning to discover cross-domain relations with generative adversarial networks","author":"kim","year":"2017","journal-title":"Proc ICLR"},{"key":"ref97","first-page":"406","article-title":"Pose guided person image generation","author":"ma","year":"2017","journal-title":"Proc NIPS"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2913545"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2018.8451694"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2896489"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803253"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/VCIP.2018.8698616"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2016.7539036"},{"key":"ref118","article-title":"Stochastic adversarial video prediction","author":"lee","year":"2018","journal-title":"arXiv 1804 01523"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2015.2500034"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46478-7_51"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2018.2873844"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01225"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2502552"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2599174"},{"key":"ref119","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00165"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/JSAC.2019.2916488"},{"key":"ref83","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"Proc NIPS"},{"key":"ref114","article-title":"Stochastic variational video prediction","author":"babaeizadeh","year":"2017","journal-title":"arXiv 1710 11252"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.308"},{"key":"ref116","first-page":"91","article-title":"Visual dynamics: Probabilistic future frame synthesis via cross convolutional networks","author":"xue","year":"2016","journal-title":"Proc NIPS"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"ref115","first-page":"1","article-title":"Stochastic video generation with a learned prior","author":"denton","year":"2018","journal-title":"Proc ICML"},{"key":"ref120","article-title":"Decomposing motion and content for natural video sequence prediction","author":"villegas","year":"2017","journal-title":"arXiv 1706 08033"},{"key":"ref89","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"2014","journal-title":"Proc NIPS"},{"key":"ref121","first-page":"2863","article-title":"Action-conditional video prediction using deep networks in Atari games","author":"oh","year":"2015","journal-title":"Proc NIPS"},{"key":"ref122","first-page":"1","article-title":"Learning to generate long-term future via hierarchical prediction","author":"villegas","year":"2017","journal-title":"Proc ICML"},{"key":"ref123","article-title":"Hierarchical long-term video prediction without supervision","author":"wichers","year":"2018","journal-title":"arXiv 1806 04768"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.590"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2799968"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2818328"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2964320"}],"container-title":["IEEE Transactions on Image Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/83\/8835130\/09180095.pdf?arnumber=9180095","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T14:39:11Z","timestamp":1651070351000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9180095\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":153,"URL":"https:\/\/doi.org\/10.1109\/tip.2020.3016485","relation":{},"ISSN":["1057-7149","1941-0042"],"issn-type":[{"type":"print","value":"1057-7149"},{"type":"electronic","value":"1941-0042"}],"subject":[],"published":{"date-parts":[[2020]]}}}