{"id":"https://openalex.org/W4379933551","doi":"https://doi.org/10.48550/arxiv.2306.04216","title":"MMSum: A Dataset for Multimodal Summarization and Thumbnail Generation of Videos","display_name":"MMSum: A Dataset for Multimodal Summarization and Thumbnail Generation of Videos","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4379933551","doi":"https://doi.org/10.48550/arxiv.2306.04216"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2306.04216","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2306.04216","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058194829","display_name":"Jielin Qiu","orcid":"https://orcid.org/0000-0002-7384-1324"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Jielin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101186958","display_name":"Jiacheng Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jiacheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066352315","display_name":"William Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, William","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102521321","display_name":"Aditesh Kumar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Aditesh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110988378","display_name":"Karthik Mittal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mittal, Karthik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108988088","display_name":"Claire Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Claire","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050209478","display_name":"Zhengyuan Yang","orcid":"https://orcid.org/0000-0002-5808-0889"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Zhengyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100657555","display_name":"Linjie Li","orcid":"https://orcid.org/0000-0003-0867-8863"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Linjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100358523","display_name":"Jianfeng Wang","orcid":"https://orcid.org/0000-0003-0932-3060"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jianfeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374360","display_name":"Bo Li","orcid":"https://orcid.org/0000-0001-6709-0942"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Bo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037644321","display_name":"Ding Zhao","orcid":"https://orcid.org/0000-0002-9400-8446"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Ding","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100436501","display_name":"Lijuan Wang","orcid":"https://orcid.org/0000-0002-2517-2728"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Lijuan","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":65},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9965,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9965,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9951,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9946,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6186135},{"id":"https://openalex.org/keywords/thumbnail","display_name":"Thumbnail","score":0.55725455},{"id":"https://openalex.org/keywords/multi-document-summarization","display_name":"Multi-document summarization","score":0.4716579}],"concepts":[{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.95302325},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.83721924},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.7117753},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6186135},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5758312},{"id":"https://openalex.org/C160174412","wikidata":"https://www.wikidata.org/wiki/Q873806","display_name":"Thumbnail","level":3,"score":0.55725455},{"id":"https://openalex.org/C134714966","wikidata":"https://www.wikidata.org/wiki/Q6934448","display_name":"Multi-document summarization","level":3,"score":0.4716579},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30479616},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2130081},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2306.04216","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2306.04216","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2306.04216","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.46}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3164984162","https://openalex.org/W2902627734","https://openalex.org/W2568827738","https://openalex.org/W2474342320","https://openalex.org/W2365100044","https://openalex.org/W2173208124","https://openalex.org/W2112885393","https://openalex.org/W2104677027","https://openalex.org/W2099859325","https://openalex.org/W1990695371"],"abstract_inverted_index":{"Multimodal":[0],"summarization":[1],"with":[2],"multimodal":[3,79],"output":[4],"(MSMO)":[5],"has":[6],"emerged":[7],"as":[8,140],"a":[9,45,96],"promising":[10],"research":[11],"direction.":[12],"Nonetheless,":[13],"numerous":[14],"limitations":[15],"exist":[16],"within":[17],"existing":[18],"public":[19],"MSMO":[20],"datasets,":[21],"including":[22,116],"insufficient":[23],"maintenance,":[24],"data":[25,137],"inaccessibility,":[26],"limited":[27],"size,":[28],"and":[29,43,69,76,83,91,114,121,127,135,146],"the":[30,56,107,132,136],"absence":[31],"of":[32,99],"proper":[33],"categorization,":[34,86],"which":[35],"pose":[36],"significant":[37],"challenges.":[38],"To":[39,124],"address":[40],"these":[41],"challenges":[42],"provide":[44],"comprehensive":[46],"dataset":[47,61,109,134],"for":[48,66,78],"this":[49],"new":[50,60],"direction,":[51],"we":[52,129],"have":[53],"meticulously":[54,84],"curated":[55],"\\textbf{MMSum}":[57,133],"dataset.":[58],"Our":[59,150],"features":[62],"(1)":[63],"Human-validated":[64],"summaries":[65],"both":[67],"video":[68],"textual":[70],"content,":[71],"providing":[72],"superior":[73],"human":[74],"instruction":[75],"labels":[77],"learning.":[80],"(2)":[81],"Comprehensively":[82],"arranged":[85],"spanning":[87],"17":[88],"principal":[89],"categories":[90],"170":[92],"subcategories":[93],"to":[94,110],"encapsulate":[95],"diverse":[97],"array":[98],"real-world":[100],"scenarios.":[101],"(3)":[102],"Benchmark":[103],"tests":[104],"performed":[105],"on":[106],"proposed":[108],"assess":[111],"various":[112],"tasks":[113],"methods,":[115],"\\textit{video":[117],"summarization},":[118,120],"\\textit{text":[119],"\\textit{multimodal":[122],"summarization}.":[123],"champion":[125],"accessibility":[126],"collaboration,":[128],"will":[130],"release":[131],"collection":[138],"tool":[139],"fully":[141],"open-source":[142],"resources,":[143],"fostering":[144],"transparency":[145],"accelerating":[147],"future":[148],"developments.":[149],"project":[151],"website":[152],"can":[153],"be":[154],"found":[155],"at~\\url{https://mmsum-dataset.github.io/}":[156]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4379933551","counts_by_year":[],"updated_date":"2025-04-14T08:47:21.085979","created_date":"2023-06-09"}