{"id":"https://openalex.org/W3205519684","doi":"https://doi.org/10.1109/iccv48922.2021.00804","title":"Attention is not Enough: Mitigating the Distribution Discrepancy in Asynchronous Multimodal Sequence Fusion","display_name":"Attention is not Enough: Mitigating the Distribution Discrepancy in Asynchronous Multimodal Sequence Fusion","publication_year":2021,"publication_date":"2021-10-01","ids":{"openalex":"https://openalex.org/W3205519684","doi":"https://doi.org/10.1109/iccv48922.2021.00804","mag":"3205519684"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv48922.2021.00804","pdf_url":null,"source":{"id":"https://openalex.org/S4363607764","display_name":"2021 IEEE/CVF International Conference on Computer Vision (ICCV)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045622740","display_name":"Tao Liang","orcid":"https://orcid.org/0000-0003-2567-6202"},"institutions":[{"id":"https://openalex.org/I4210126929","display_name":"CE Technologies (United Kingdom)","ror":"https://ror.org/02pw67e25","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210126929"]},{"id":"https://openalex.org/I4800084","display_name":"Southwest Jiaotong University","ror":"https://ror.org/00hn7w693","country_code":"CN","type":"education","lineage":["https://openalex.org/I4800084"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Tao Liang","raw_affiliation_strings":["Engineering Productivity & Quality Assurance of IES, Bytedance","Southwest Jiaotong University"],"affiliations":[{"raw_affiliation_string":"Engineering Productivity & Quality Assurance of IES, Bytedance","institution_ids":["https://openalex.org/I4210126929"]},{"raw_affiliation_string":"Southwest Jiaotong University","institution_ids":["https://openalex.org/I4800084"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029912845","display_name":"Guosheng Lin","orcid":"https://orcid.org/0000-0002-0329-7458"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Guosheng Lin","raw_affiliation_strings":["Nanyang Technological University"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100682348","display_name":"Lei Feng","orcid":"https://orcid.org/0000-0003-2839-5799"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Lei Feng","raw_affiliation_strings":["Nanyang Technological University"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100456377","display_name":"Yan Zhang","orcid":"https://orcid.org/0000-0003-1585-0801"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Zhang","raw_affiliation_strings":["University of Electronic Science and Technology of China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009594218","display_name":"Fengmao Lv","orcid":"https://orcid.org/0000-0003-1640-0992"},"institutions":[{"id":"https://openalex.org/I204831749","display_name":"Southwestern University of Finance and Economics","ror":"https://ror.org/04ewct822","country_code":"CN","type":"education","lineage":["https://openalex.org/I204831749"]},{"id":"https://openalex.org/I4800084","display_name":"Southwest Jiaotong University","ror":"https://ror.org/00hn7w693","country_code":"CN","type":"education","lineage":["https://openalex.org/I4800084"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fengmao Lv","raw_affiliation_strings":["Center of Statistical Research, Southwestern University of Finance and Economics","Southwest Jiaotong University"],"affiliations":[{"raw_affiliation_string":"Center of Statistical Research, Southwestern University of Finance and Economics","institution_ids":["https://openalex.org/I204831749"]},{"raw_affiliation_string":"Southwest Jiaotong University","institution_ids":["https://openalex.org/I4800084"]}]}],"institution_assertions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.559,"has_fulltext":false,"cited_by_count":38,"citation_normalized_percentile":{"value":0.756396,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"8128","last_page":"8136"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9987,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9987,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9975,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/crossmodal","display_name":"Crossmodal","score":0.7893226},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.7536753},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.65821844},{"id":"https://openalex.org/keywords/multimodal-learning","display_name":"Multimodal learning","score":0.5445235}],"concepts":[{"id":"https://openalex.org/C60115397","wikidata":"https://www.wikidata.org/wiki/Q5188732","display_name":"Crossmodal","level":4,"score":0.7893226},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.7536753},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.721201},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.65821844},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.5580008},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5452256},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.5445235},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43904543},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.29470396},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.15577039},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv48922.2021.00804","pdf_url":null,"source":{"id":"https://openalex.org/S4363607764","display_name":"2021 IEEE/CVF International Conference on Computer Vision (ICCV)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality education","score":0.61,"id":"https://metadata.un.org/sdg/4"}],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":null},{"funder":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities","award_id":null}],"datasets":[],"versions":[],"referenced_works_count":32,"referenced_works":["https://openalex.org/W1882958252","https://openalex.org/W2095176743","https://openalex.org/W2105020570","https://openalex.org/W2115403315","https://openalex.org/W2127141656","https://openalex.org/W2146334809","https://openalex.org/W2159291411","https://openalex.org/W2184188583","https://openalex.org/W2250539671","https://openalex.org/W2395639500","https://openalex.org/W2470957930","https://openalex.org/W2556418146","https://openalex.org/W2593768305","https://openalex.org/W2612041314","https://openalex.org/W2777446440","https://openalex.org/W2808359495","https://openalex.org/W2883409523","https://openalex.org/W2883430806","https://openalex.org/W2892946488","https://openalex.org/W2895281799","https://openalex.org/W2946170651","https://openalex.org/W2949813473","https://openalex.org/W2951670162","https://openalex.org/W2962931510","https://openalex.org/W2963403868","https://openalex.org/W2963826681","https://openalex.org/W2964051877","https://openalex.org/W2964216663","https://openalex.org/W2964266095","https://openalex.org/W2985406498","https://openalex.org/W3169801598","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4320153225","https://openalex.org/W4313547873","https://openalex.org/W4293261942","https://openalex.org/W4224266612","https://openalex.org/W3125968744","https://openalex.org/W2383394264","https://openalex.org/W2110287964","https://openalex.org/W2041257890","https://openalex.org/W203959209"],"abstract_inverted_index":{"Videos":[0],"flow":[1],"as":[2],"the":[3,27,42,59,73,77,83,89,97,107,121,136,149,153,161,165,187],"mixture":[4],"of":[5,20,76,92,164,181,189],"language,":[6],"acoustic,":[7],"and":[8,152,167],"vision":[9],"modalities.":[10,65,175],"A":[11],"thorough":[12],"video":[13,183],"understanding":[14,184],"needs":[15],"to":[16,26,57,81],"fuse":[17],"time-series":[18],"data":[19],"different":[21,64,101,140,174],"modalities":[22,141],"for":[23,31],"prediction.":[24],"Due":[25],"variable":[28],"receiving":[29],"frequency":[30],"sequences":[32],"from":[33,51,63,96,173],"each":[34],"modality,":[35],"there":[36],"usually":[37],"exists":[38],"inherent":[39],"asynchrony":[40],"across":[41,100],"collected":[43],"multimodal":[44,49,53,182],"streams.":[45],"Towards":[46],"an":[47],"efficient":[48],"fusion":[50],"asynchronous":[52],"streams,":[54],"we":[55],"need":[56],"model":[58],"correlations":[60,157],"between":[61,86,139],"elements":[62,154],"The":[66],"recent":[67],"Multimodal":[68],"Transformer":[69,79],"(MulT)":[70],"approach":[71,126],"extends":[72],"self-attention":[74,93],"mechanism":[75],"original":[78],"network":[80],"learn":[82],"crossmodal":[84,109,129],"dependencies":[85,110],"elements.":[87],"However,":[88],"direct":[90],"replication":[91],"will":[94],"suffer":[95],"distribution":[98,137,151],"mismatch":[99,138],"modality":[102],"features.":[103],"As":[104],"a":[105],"result,":[106],"learnt":[108],"can":[111],"be":[112],"unreliable.":[113],"Motivated":[114],"by":[115],"this":[116,118,146],"observation,":[117],"work":[119],"proposes":[120],"Modality-Invariant":[122],"Crossmodal":[123],"Attention":[124],"(MICA)":[125],"towards":[127],"learning":[128],"interactions":[130],"over":[131,160],"modality-invariant":[132],"space":[133,163],"in":[134],"which":[135,170],"is":[142],"well":[143],"bridged.":[144],"To":[145],"end,":[147],"both":[148],"marginal":[150],"with":[155],"high-confidence":[156],"are":[158,171],"aligned":[159],"common":[162],"query":[166],"key":[168],"vectors":[169],"computed":[172],"Experiments":[176],"on":[177],"three":[178],"standard":[179],"benchmarks":[180],"clearly":[185],"validate":[186],"superiority":[188],"our":[190],"approach.":[191]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3205519684","counts_by_year":[{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":6}],"updated_date":"2025-01-02T12:01:05.619647","created_date":"2021-10-25"}