{"id":"https://openalex.org/W3175052625","doi":"https://doi.org/10.1109/cvpr46437.2021.01243","title":"SSAN: Separable Self-Attention Network for Video Representation Learning","display_name":"SSAN: Separable Self-Attention Network for Video Representation Learning","publication_year":2021,"publication_date":"2021-06-01","ids":{"openalex":"https://openalex.org/W3175052625","doi":"https://doi.org/10.1109/cvpr46437.2021.01243","mag":"3175052625"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr46437.2021.01243","pdf_url":null,"source":{"id":"https://openalex.org/S4363607701","display_name":"2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2105.13033","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053676942","display_name":"Xudong Guo","orcid":"https://orcid.org/0000-0002-9607-2679"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xudong Guo","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100895647","display_name":"Xun Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xun Guo","raw_affiliation_strings":["Microsoft Research Asia"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100756584","display_name":"Yan Lu","orcid":"https://orcid.org/0000-0001-5383-6424"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Lu","raw_affiliation_strings":["Microsoft Research Asia"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.477,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.99987,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":95},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9874,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature Learning","score":0.6078423},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.5559855}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8162321},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.62400806},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.6078423},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5559855},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.534758},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5238183},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42878667},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32062644},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr46437.2021.01243","pdf_url":null,"source":{"id":"https://openalex.org/S4363607701","display_name":"2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2105.13033","pdf_url":"https://arxiv.org/pdf/2105.13033","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2105.13033","pdf_url":"https://arxiv.org/pdf/2105.13033","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Quality education","score":0.41,"id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":51,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W1527575280","https://openalex.org/W1923404803","https://openalex.org/W1957706851","https://openalex.org/W2016053056","https://openalex.org/W2136853139","https://openalex.org/W237546731","https://openalex.org/W2507009361","https://openalex.org/W2526286384","https://openalex.org/W2531409750","https://openalex.org/W2549139847","https://openalex.org/W2553594924","https://openalex.org/W2565656701","https://openalex.org/W2622263826","https://openalex.org/W2625366777","https://openalex.org/W2750526644","https://openalex.org/W2770804203","https://openalex.org/W2773514261","https://openalex.org/W2799261915","https://openalex.org/W2885775891","https://openalex.org/W2894608918","https://openalex.org/W2896457183","https://openalex.org/W2948048211","https://openalex.org/W2955058313","https://openalex.org/W2963091558","https://openalex.org/W2963125010","https://openalex.org/W2963155035","https://openalex.org/W2963321359","https://openalex.org/W2963341956","https://openalex.org/W2963351113","https://openalex.org/W2963403868","https://openalex.org/W2963524571","https://openalex.org/W2963563276","https://openalex.org/W2963820951","https://openalex.org/W2964080601","https://openalex.org/W2969876226","https://openalex.org/W2975357369","https://openalex.org/W2981304952","https://openalex.org/W2981413347","https://openalex.org/W2981851019","https://openalex.org/W2981899103","https://openalex.org/W2984008963","https://openalex.org/W2984287396","https://openalex.org/W2989728968","https://openalex.org/W2990152177","https://openalex.org/W2990503944","https://openalex.org/W2995460200","https://openalex.org/W3035635319","https://openalex.org/W3107634219","https://openalex.org/W4247541366","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4372354731","https://openalex.org/W4360593462","https://openalex.org/W2949152769","https://openalex.org/W2942366970","https://openalex.org/W2793211469","https://openalex.org/W2597588799","https://openalex.org/W2562400057","https://openalex.org/W2487162673","https://openalex.org/W2194570607","https://openalex.org/W1692008701"],"abstract_inverted_index":{"Self-attention":[0],"has":[1],"been":[2],"successfully":[3],"applied":[4],"to":[5,10],"video":[6,102,109,147,154,164],"representation":[7,103],"learning":[8,51,138],"due":[9],"the":[11,21,26,106,136,151,171],"effectiveness":[12],"of":[13,45,108,140,146,153],"modeling":[14],"long":[15],"range":[16],"dependencies.":[17],"Existing":[18],"approaches":[19],"build":[20,96],"dependencies":[22],"merely":[23],"by":[24,167],"computing":[25],"pairwise":[27],"correlations":[28,37,40,75],"along":[29],"spatial":[30,36,52,72,79],"and":[31,38,47,73,119,130,156,161],"temporal":[32,39,48,58,74,86],"dimensions":[33],"simultaneously.":[34],"However,":[35],"represent":[41],"different":[42],"contextual":[43,53],"information":[44,54],"scenes":[46],"reasoning.":[49],"Intuitively,":[50],"first":[55],"will":[56],"benefit":[57],"modeling.":[59,87],"In":[60],"this":[61],"paper,":[62],"we":[63,95],"propose":[64],"a":[65,97],"separable":[66],"self-attention":[67],"(SSA)":[68],"module,":[69],"which":[70,149],"models":[71,123],"sequentially,":[76],"so":[77],"that":[78],"contexts":[80],"can":[81],"be":[82],"efficiently":[83],"used":[84],"in":[85,143],"By":[88],"adding":[89],"SSA":[90,98,168],"module":[91],"into":[92],"2D":[93],"CNN,":[94],"network":[99,129],"(SSAN)":[100],"for":[101],"learning.":[104],"On":[105,159],"task":[107,145],"action":[110],"recognition,":[111],"our":[112,141],"approach":[113],"outperforms":[114],"state-of-the-art":[115,172],"methods":[116],"on":[117],"Something-Something":[118],"Kinetics-400":[120],"datasets.":[121],"Our":[122],"often":[124],"outperform":[125],"counterparts":[126],"with":[127],"shallower":[128],"fewer":[131],"modalities.":[132],"We":[133],"further":[134],"verify":[135],"semantic":[137],"ability":[139],"method":[142],"visual-language":[144],"retrieval,":[148],"showcases":[150],"homogeneity":[152],"representations":[155,165],"text":[157],"embeddings.":[158],"MSR-VTT":[160],"Youcook2":[162],"datasets,":[163],"learnt":[166],"significantly":[169],"improve":[170],"performance.":[173]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3175052625","counts_by_year":[{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":2}],"updated_date":"2024-12-14T11:42:52.558505","created_date":"2021-07-05"}