{"id":"https://openalex.org/W4391124692","doi":"https://doi.org/10.48550/arxiv.2401.10761","title":"NN-VVC: Versatile Video Coding boosted by self-supervisedly learned image coding for machines","display_name":"NN-VVC: Versatile Video Coding boosted by self-supervisedly learned image coding for machines","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4391124692","doi":"https://doi.org/10.48550/arxiv.2401.10761"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2401.10761","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2401.10761","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028336376","display_name":"Jukka Ahonen","orcid":"https://orcid.org/0000-0003-1683-4475"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahonen, Jukka I.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100768610","display_name":"Nam Le","orcid":"https://orcid.org/0000-0002-9722-3790"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Le, Nam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100671939","display_name":"Honglei Zhang","orcid":"https://orcid.org/0000-0002-8229-852X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Honglei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059063295","display_name":"Antti Hallapuro","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hallapuro, Antti","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049106121","display_name":"Francesco Cricri","orcid":"https://orcid.org/0000-0002-1521-420X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cricri, Francesco","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002546998","display_name":"Hamed R. Tavakoli","orcid":"https://orcid.org/0000-0002-9466-9148"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tavakoli, Hamed Rezazadegan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044816664","display_name":"Miska M. Hannuksela","orcid":"https://orcid.org/0000-0003-3405-0850"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hannuksela, Miska M.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5088180438","display_name":"Esa Rahtu","orcid":"https://orcid.org/0000-0001-8767-0864"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rahtu, Esa","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9986,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9986,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.8955827}],"concepts":[{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.8955827},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8312454},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5562148},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.54506105},{"id":"https://openalex.org/C20136886","wikidata":"https://www.wikidata.org/wiki/Q749647","display_name":"Interoperability","level":2,"score":0.49340764},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.4909168},{"id":"https://openalex.org/C23431618","wikidata":"https://www.wikidata.org/wiki/Q1404672","display_name":"Multiview Video Coding","level":4,"score":0.4403823},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.43445802},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.40760443},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.37664014},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2686631},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.247845},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.067174315},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2401.10761","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2401.10761","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2401.10761","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4387445182","https://openalex.org/W4323657011","https://openalex.org/W4249049174","https://openalex.org/W2595406057","https://openalex.org/W2379517759","https://openalex.org/W2160581253","https://openalex.org/W2051701434","https://openalex.org/W2039662546","https://openalex.org/W2035981816","https://openalex.org/W2029405403"],"abstract_inverted_index":{"The":[0,61,91],"recent":[1],"progress":[2],"in":[3,56,143,172],"artificial":[4],"intelligence":[5],"has":[6,53],"led":[7],"to":[8,168,189],"an":[9,161],"ever-increasing":[10],"usage":[11],"of":[12,28,40,160,218],"images":[13],"and":[14,26,47,88,112,125,128,165,175,191,201,211,238],"videos":[15],"by":[16,65],"machine":[17,51,116,132,212,240],"analysis":[18,52],"algorithms,":[19],"mainly":[20,55],"neural":[21],"networks.":[22],"Nonetheless,":[23],"compression,":[24],"storage":[25],"transmission":[27],"media":[29],"have":[30],"traditionally":[31],"been":[32],"designed":[33],"considering":[34],"human":[35],"beings":[36],"as":[37],"the":[38,41,96,119,131,139,158,184,216,223],"viewers":[39],"content.":[42],"Recent":[43],"research":[44,225],"on":[45,75,81,207,235],"image":[46,76,163,174,200],"video":[48,86,106,176,202,230],"coding":[49,177],"for":[50,152,178,199],"progressed":[54],"two":[57],"almost":[58],"orthogonal":[59],"directions.":[60],"first":[62,224],"is":[63,135,222],"represented":[64],"end-to-end":[66],"(E2E)":[67],"learned":[68],"codecs":[69,87],"which,":[70],"while":[71],"offering":[72],"high":[73,170],"performance":[74,134,171],"coding,":[77],"are":[78],"not":[79],"yet":[80],"par":[82],"with":[83,110],"state-of-the-art":[84],"conventional":[85,105],"lack":[89],"interoperability.":[90],"second":[92],"direction":[93],"considers":[94],"using":[95],"Versatile":[97],"Video":[98],"Coding":[99],"(VVC)":[100],"standard":[101],"or":[102],"any":[103],"other":[104],"codec":[107,151,164,231],"(CVC)":[108],"together":[109],"pre-":[111],"post-processing":[113],"operations":[114],"targeting":[115],"analysis.":[117],"While":[118],"CVC-based":[120],"methods":[121],"benefit":[122],"from":[123],"interoperability":[124],"broad":[126],"hardware":[127],"software":[129],"support,":[130],"task":[133],"often":[136],"lower":[137],"than":[138],"desired":[140],"level,":[141],"particularly":[142],"low":[144],"bitrates.":[145],"This":[146],"paper":[147,226],"proposes":[148],"a":[149,166,228],"hybrid":[150,229],"machines":[153],"called":[154],"NN-VVC,":[155],"which":[156],"combines":[157],"advantages":[159],"E2E-learned":[162],"CVC":[167],"achieve":[169],"both":[173],"machines.":[179],"Our":[180],"experiments":[181],"show":[182],"that":[183,232],"proposed":[185],"system":[186],"achieved":[187],"up":[188],"-43.20%":[190],"-26.8%":[192],"Bj{\\o}ntegaard":[193],"Delta":[194],"rate":[195],"reduction":[196],"over":[197],"VVC":[198,234],"data,":[203],"respectively,":[204],"when":[205],"evaluated":[206],"multiple":[208,236,239],"different":[209],"datasets":[210,237],"vision":[213,241],"tasks.":[214,242],"To":[215],"best":[217],"our":[219],"knowledge,":[220],"this":[221],"showing":[227],"outperforms":[233]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4391124692","counts_by_year":[],"updated_date":"2025-04-22T20:39:21.922180","created_date":"2024-01-23"}