{"id":"https://openalex.org/W4377864723","doi":"https://doi.org/10.48550/arxiv.2305.12767","title":"D$^2$TV: Dual Knowledge Distillation and Target-oriented Vision Modeling for Many-to-Many Multimodal Summarization","display_name":"D$^2$TV: Dual Knowledge Distillation and Target-oriented Vision Modeling for Many-to-Many Multimodal Summarization","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4377864723","doi":"https://doi.org/10.48550/arxiv.2305.12767"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.12767","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2305.12767","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102206923","display_name":"Yunlong Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Yunlong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024849044","display_name":"Fandong Meng","orcid":"https://orcid.org/0000-0002-8158-2377"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Fandong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062115445","display_name":"Jiaan Wang","orcid":"https://orcid.org/0000-0002-2587-7648"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiaan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101698034","display_name":"Jinan Xu","orcid":"https://orcid.org/0000-0003-0170-626X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Jinan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100394297","display_name":"Yufeng Chen","orcid":"https://orcid.org/0000-0003-0437-6788"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yufeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100620306","display_name":"Jie Zhou","orcid":"https://orcid.org/0000-0001-7701-234X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jie","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":65},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.997,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.997,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9958,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9922,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.967924},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.83562404},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6701708},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.64707834},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5924826},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5578686},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5227711},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.521211},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39788243},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.38855165},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.06768018},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.12767","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.12767","pdf_url":"http://arxiv.org/pdf/2305.12767","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2305.12767","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.12767","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.65,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4389760904","https://openalex.org/W4323520239","https://openalex.org/W4306886878","https://openalex.org/W4242223894","https://openalex.org/W3148229873","https://openalex.org/W2366403280","https://openalex.org/W2150160875","https://openalex.org/W2091301346","https://openalex.org/W1517524280","https://openalex.org/W1495108544"],"abstract_inverted_index":{"Many-to-many":[0],"multimodal":[1,26,31,198],"summarization":[2,28,33,199],"(M$^3$S)":[3],"task":[4],"aims":[5],"to":[6,42,59,72,148],"generate":[7],"summaries":[8],"in":[9,15,52],"any":[10,16],"language":[11,17],"with":[12,94],"document":[13],"inputs":[14],"and":[18,30,47,110,122,143,151,172],"the":[19,60,80,128,132,139,182,186,189],"corresponding":[20],"image":[21],"sequence,":[22],"which":[23],"essentially":[24],"comprises":[25],"monolingual":[27],"(MMS)":[29],"cross-lingual":[32],"(MXLS)":[34],"tasks.":[35],"Although":[36],"much":[37],"work":[38],"has":[39,48],"been":[40],"devoted":[41],"either":[43],"MMS":[44,71,83,87,142],"or":[45,84,97],"MXLS":[46,74,144],"obtained":[49],"increasing":[50],"attention":[51,58],"recent":[53],"years,":[54],"little":[55],"research":[56],"pays":[57],"M$^3$S":[61,129],"task.":[62,130],"Besides,":[63],"existing":[64],"studies":[65],"mainly":[66],"focus":[67],"on":[68,181],"1)":[69],"utilizing":[70],"enhance":[73],"via":[75],"knowledge":[76,120,134,140],"distillation":[77,121,135],"without":[78],"considering":[79],"performance":[81],"of":[82,141,156,188],"2)":[85],"improving":[86],"models":[88],"by":[89],"filtering":[90],"summary-unrelated":[91],"visual":[92,161,177],"features":[93],"implicit":[95],"learning":[96],"explicitly":[98],"complex":[99],"training":[100],"objectives.":[101],"In":[102],"this":[103],"paper,":[104],"we":[105,116,193],"first":[106],"introduce":[107],"a":[108,118,163,196],"general":[109],"practical":[111],"task,":[112],"i.e.,":[113],"M$^3$S.":[114],"Further,":[115],"propose":[117],"dual":[119,133],"target-oriented":[123,160,167],"vision":[124],"modeling":[125],"framework":[126],"for":[127,174],"Specifically,":[131],"method":[136],"guarantees":[137],"that":[138],"can":[145],"be":[146],"transferred":[147],"each":[149],"other":[150],"thus":[152],"mutually":[153],"prompt":[154],"both":[155],"them.":[157],"To":[158],"offer":[159],"features,":[162],"simple":[164],"yet":[165],"effective":[166],"contrastive":[168],"objective":[169],"is":[170],"designed":[171],"responsible":[173],"discarding":[175],"needless":[176],"information.":[178],"Extensive":[179],"experiments":[180],"many-to-many":[183,197],"setting":[184],"show":[185],"effectiveness":[187],"proposed":[190],"approach.":[191],"Additionally,":[192],"will":[194],"contribute":[195],"(M$^3$Sum)":[200],"dataset.":[201]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4377864723","counts_by_year":[],"updated_date":"2025-04-11T02:46:33.674853","created_date":"2023-05-24"}