{"id":"https://openalex.org/W4226139655","doi":"https://doi.org/10.48550/arxiv.2203.01225","title":"Video Question Answering: Datasets, Algorithms and Challenges","display_name":"Video Question Answering: Datasets, Algorithms and Challenges","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4226139655","doi":"https://doi.org/10.48550/arxiv.2203.01225"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.01225","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2203.01225","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028920827","display_name":"Yaoyao Zhong","orcid":"https://orcid.org/0000-0002-2671-9350"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Yaoyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056318151","display_name":"Wei Ji","orcid":"https://orcid.org/0000-0003-4059-5902"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024669782","display_name":"Junbin Xiao","orcid":"https://orcid.org/0000-0001-5573-6195"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Junbin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101762861","display_name":"Yicong Li","orcid":"https://orcid.org/0000-0002-5659-793X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yicong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025452586","display_name":"Weihong Deng","orcid":"https://orcid.org/0000-0001-5952-6996"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Weihong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5089404640","display_name":"Tat\u2010Seng Chua","orcid":"https://orcid.org/0000-0001-6097-7807"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chua, Tat-Seng","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":59},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9945,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9927,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.81911767},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7812332},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.76089466},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7338173},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.5335985},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.5104946},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.4540693},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.42030865},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40262285},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39714494},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36604273},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.12412834},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.01225","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2203.01225","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2203.01225","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Quality education","score":0.85,"id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4311804456","https://openalex.org/W2735662278","https://openalex.org/W2623658258","https://openalex.org/W2382615723","https://openalex.org/W2370459448","https://openalex.org/W2165912799","https://openalex.org/W2143413548","https://openalex.org/W2105067402","https://openalex.org/W1987484445","https://openalex.org/W1969219540"],"abstract_inverted_index":{"Video":[0],"Question":[1],"Answering":[2],"(VideoQA)":[3],"aims":[4],"to":[5,11,63,81,103],"answer":[6],"natural":[7],"language":[8,28],"questions":[9],"according":[10],"the":[12,85,95,107],"given":[13],"videos.":[14],"It":[15],"has":[16],"earned":[17],"increasing":[18],"attention":[19],"with":[20,32],"recent":[21],"research":[22,96],"trends":[23],"in":[24],"joint":[25],"vision":[26],"and":[27,38,48,78,88],"understanding.":[29],"Yet,":[30],"compared":[31],"ImageQA,":[33],"VideoQA":[34,53],"is":[35],"largely":[36],"underexplored":[37],"progresses":[39],"slowly.":[40],"Although":[41],"different":[42,52],"algorithms":[43],"have":[44],"continually":[45],"been":[46],"proposed":[47],"shown":[49],"success":[50],"on":[51,84],"datasets,":[54,86],"we":[55,113],"find":[56],"that":[57],"there":[58],"lacks":[59],"a":[60,75],"meaningful":[61],"survey":[62],"categorize":[64],"them,":[65],"which":[66],"seriously":[67],"impedes":[68],"its":[69],"advancements.":[70],"This":[71],"paper":[72],"thus":[73],"provides":[74],"clear":[76],"taxonomy":[77],"comprehensive":[79],"analyses":[80],"VideoQA,":[82],"focusing":[83],"algorithms,":[87],"unique":[89],"challenges.":[90],"We":[91],"then":[92],"point":[93],"out":[94],"trend":[97],"of":[98,109],"studying":[99],"beyond":[100],"factoid":[101],"QA":[102,105],"inference":[104],"towards":[106],"cognition":[108],"video":[110],"contents,":[111],"Finally,":[112],"conclude":[114],"some":[115],"promising":[116],"directions":[117],"for":[118],"future":[119],"exploration.":[120]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4226139655","counts_by_year":[],"updated_date":"2025-03-04T14:23:34.373036","created_date":"2022-05-05"}