{"id":"https://openalex.org/W4403572874","doi":"https://doi.org/10.48550/arxiv.2410.10818","title":"TemporalBench: Benchmarking Fine-grained Temporal Understanding for\n Multimodal Video Models","display_name":"TemporalBench: Benchmarking Fine-grained Temporal Understanding for\n Multimodal Video Models","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4403572874","doi":"https://doi.org/10.48550/arxiv.2410.10818"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.10818","pdf_url":"http://arxiv.org/pdf/2410.10818","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2410.10818","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081591501","display_name":"Mu Cai","orcid":"https://orcid.org/0009-0008-7967-9752"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Mu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045400343","display_name":"Rong Tan","orcid":"https://orcid.org/0000-0002-8705-0099"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Reuben","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101666328","display_name":"Jianrui Zhang","orcid":"https://orcid.org/0000-0003-0666-0811"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jianrui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101528814","display_name":"Bocheng Zou","orcid":"https://orcid.org/0009-0001-5964-9141"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Bocheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101464999","display_name":"Kai Zhang","orcid":"https://orcid.org/0000-0002-5142-0182"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100744373","display_name":"Feng Yao","orcid":"https://orcid.org/0000-0002-7321-0514"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Feng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075229802","display_name":"Fangrui Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Fangrui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035406435","display_name":"Jing Gu","orcid":"https://orcid.org/0000-0002-8870-3338"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048972245","display_name":"Yunfang Zhong","orcid":"https://orcid.org/0000-0003-1618-1662"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Yiwu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066394654","display_name":"Yue Shang","orcid":"https://orcid.org/0000-0002-8576-4338"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shang, Yuzhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090791245","display_name":"Yaling Dou","orcid":"https://orcid.org/0000-0001-5240-2384"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dou, Yao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107280258","display_name":"J. Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Jaden","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047233371","display_name":"Jianfeng Gao","orcid":"https://orcid.org/0000-0002-6371-505X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Jianfeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112702739","display_name":"Yong Jae Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Yong Jae","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100632854","display_name":"Jianwei Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jianwei","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9809,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9809,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9757,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9605,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.79463035}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.79463035},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5999382},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.09295514},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.10818","pdf_url":"http://arxiv.org/pdf/2410.10818","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.10818","pdf_url":"http://arxiv.org/pdf/2410.10818","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W435179959","https://openalex.org/W4238897586","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2619091065","https://openalex.org/W2291782699","https://openalex.org/W2284465472","https://openalex.org/W2059640416","https://openalex.org/W1490753184"],"abstract_inverted_index":{"Understanding":[0],"fine-grained":[1,17,49],"temporal":[2,18,35,50,69,86,160,213],"dynamics":[3,70],"is":[4],"crucial":[5],"for":[6,34,83,168,189],"multimodal":[7,127],"video":[8,21,58,72,109,118,128],"comprehension":[9],"and":[10,28,88,112,116,131,157,181,218],"generation.":[11],"Due":[12],"to":[13,47,199],"the":[14,68,175],"lack":[15],"of":[16,56],"annotations,":[19],"existing":[20],"benchmarks":[22,27],"mostly":[23],"resemble":[24],"static":[25],"image":[26],"are":[29],"incompetent":[30],"at":[31],"evaluating":[32,48,84],"models":[33,124,130,139],"understanding.":[36,161],"In":[37],"this":[38],"paper,":[39],"we":[40,163,193],"introduce":[41],"TemporalBench,":[42,149],"a":[43,75,80,151,165,183,187],"new":[44],"benchmark":[45,78],"dedicated":[46],"understanding":[51,87],"in":[52,71,159,178],"videos.":[53],"TemporalBench":[54,206],"consists":[55],"~10K":[57],"question-answer":[59],"pairs,":[60],"derived":[61],"from":[62],"~2K":[63],"high-quality":[64],"human":[65],"annotations":[66],"detailing":[67],"clips.":[73],"As":[74],"result,":[76],"our":[77],"provides":[79],"unique":[81],"testbed":[82],"various":[85,105],"reasoning":[89,214],"abilities":[90],"such":[91,125,201],"as":[92,120,122,126,186],"action":[93],"frequency,":[94],"motion":[95],"magnitude,":[96],"event":[97],"order,":[98],"etc.":[99],"Moreover,":[100],"it":[101],"enables":[102],"evaluations":[103],"on":[104,148,210],"tasks":[106],"like":[107,140],"both":[108,114],"question":[110,145],"answering":[111,146],"captioning,":[113],"short":[115],"long":[117],"understanding,":[119],"well":[121],"different":[123],"embedding":[129],"text":[132],"generation":[133],"models.":[134],"Results":[135],"show":[136],"that":[137,205],"state-of-the-art":[138],"GPT-4o":[141],"achieve":[142],"only":[143],"38.5%":[144],"accuracy":[147],"demonstrating":[150],"significant":[152],"gap":[153],"(~30%)":[154],"between":[155],"humans":[156],"AI":[158],"Furthermore,":[162],"notice":[164],"critical":[166],"pitfall":[167],"multi-choice":[169],"QA":[170],"where":[171,192],"LLMs":[172],"can":[173,207],"detect":[174],"subtle":[176],"changes":[177],"negative":[179],"captions":[180],"find":[182],"centralized":[184],"description":[185],"cue":[188],"its":[190],"prediction,":[191],"propose":[194],"Multiple":[195],"Binary":[196],"Accuracy":[197],"(MBA)":[198],"correct":[200],"bias.":[202],"We":[203],"hope":[204],"foster":[208],"research":[209],"improving":[211],"models'":[212],"capabilities.":[215],"Both":[216],"dataset":[217],"evaluation":[219],"code":[220],"will":[221],"be":[222],"made":[223],"available.":[224]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403572874","counts_by_year":[],"updated_date":"2025-04-22T07:19:44.886344","created_date":"2024-10-20"}