{"id":"https://openalex.org/W4386108371","doi":"https://doi.org/10.1109/tmm.2023.3307933","title":"Learning Representations by Contrastive Spatio-Temporal Clustering for Skeleton-Based Action Recognition","display_name":"Learning Representations by Contrastive Spatio-Temporal Clustering for Skeleton-Based Action Recognition","publication_year":2023,"publication_date":"2023-08-23","ids":{"openalex":"https://openalex.org/W4386108371","doi":"https://doi.org/10.1109/tmm.2023.3307933"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2023.3307933","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019924823","display_name":"Mingdao Wang","orcid":"https://orcid.org/0009-0004-2907-086X"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingdao Wang","raw_affiliation_strings":["School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029828175","display_name":"Xueming Li","orcid":"https://orcid.org/0000-0003-1058-2799"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueming Li","raw_affiliation_strings":["School of Digital Media and Design Arts, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Digital Media and Design Arts, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067543837","display_name":"Siqi Chen","orcid":"https://orcid.org/0000-0001-6116-7080"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siqi Chen","raw_affiliation_strings":["School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045236530","display_name":"Xianlin Zhang","orcid":"https://orcid.org/0000-0003-3905-2062"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianlin Zhang","raw_affiliation_strings":["School of Digital Media and Design Arts, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Digital Media and Design Arts, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100565849","display_name":"Lei Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Lei Ma","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Alberta, Edmonton, AB, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Alberta, Edmonton, AB, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100333738","display_name":"Yue Zhang","orcid":"https://orcid.org/0000-0002-6327-5023"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Zhang","raw_affiliation_strings":["School of Digital Media and Design Arts, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Digital Media and Design Arts, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.651,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.588886,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":80,"max":85},"biblio":{"volume":"26","issue":null,"first_page":"3207","last_page":"3220"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Action Recognition and Pose Estimation","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Action Recognition and Pose Estimation","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition for Human Identification","score":0.9941,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection in High-Dimensional Data","score":0.992,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6553388},{"id":"https://openalex.org/keywords/action-recognition","display_name":"Action Recognition","score":0.556102},{"id":"https://openalex.org/keywords/cross-view-recognition","display_name":"Cross-View Recognition","score":0.549264},{"id":"https://openalex.org/keywords/skeleton-based-recognition","display_name":"Skeleton-Based Recognition","score":0.542749},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.53024197},{"id":"https://openalex.org/keywords/spatiotemporal-features","display_name":"Spatiotemporal Features","score":0.530217},{"id":"https://openalex.org/keywords/silhouette-analysis","display_name":"Silhouette Analysis","score":0.511261},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.44131088}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7861823},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.67223483},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.65760946},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6553388},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.53024197},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48517567},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.44131088},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4128191}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2023.3307933","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6,"display_name":"Reduced inequalities"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":50,"referenced_works":["https://openalex.org/W1821462560","https://openalex.org/W2051822550","https://openalex.org/W2058787788","https://openalex.org/W2089611415","https://openalex.org/W2187089797","https://openalex.org/W2739074143","https://openalex.org/W2765433083","https://openalex.org/W2785325870","https://openalex.org/W2787919227","https://openalex.org/W2884286326","https://openalex.org/W2944006115","https://openalex.org/W2956928039","https://openalex.org/W2963076818","https://openalex.org/W2964134613","https://openalex.org/W2970567068","https://openalex.org/W2981952041","https://openalex.org/W2994721977","https://openalex.org/W2997907976","https://openalex.org/W3018265077","https://openalex.org/W3019723613","https://openalex.org/W3034345981","https://openalex.org/W3034548564","https://openalex.org/W3034999503","https://openalex.org/W3035225512","https://openalex.org/W3035524453","https://openalex.org/W3103184573","https://openalex.org/W3105195350","https://openalex.org/W3106776857","https://openalex.org/W3111448578","https://openalex.org/W3156509901","https://openalex.org/W3169413442","https://openalex.org/W3172117784","https://openalex.org/W3203227473","https://openalex.org/W3205106480","https://openalex.org/W3212618454","https://openalex.org/W3216909972","https://openalex.org/W4200634815","https://openalex.org/W4226027695","https://openalex.org/W4282981352","https://openalex.org/W4284975785","https://openalex.org/W4288024349","https://openalex.org/W4297808394","https://openalex.org/W4312387119","https://openalex.org/W4312629998","https://openalex.org/W4312675926","https://openalex.org/W4312835938","https://openalex.org/W4312841534","https://openalex.org/W4313165677","https://openalex.org/W4313185874","https://openalex.org/W4319993407"],"related_works":["https://openalex.org/W4389116644","https://openalex.org/W4388405611","https://openalex.org/W3208297503","https://openalex.org/W3119773509","https://openalex.org/W3103844505","https://openalex.org/W2964117661","https://openalex.org/W2889153461","https://openalex.org/W2619127353","https://openalex.org/W259157601","https://openalex.org/W2153315159"],"abstract_inverted_index":{"Self-supervised":[0],"representation":[1],"learning":[2,157],"has":[3],"proven":[4],"constructive":[5],"for":[6,175],"skeleton-based":[7],"action":[8,68],"recognition.":[9],"For":[10],"better":[11],"performance,":[12],"existing":[13],"methods":[14,301],"mainly":[15],"focus":[16,303],"on":[17,138,249,295,304],"(1)":[18],"multi-modal":[19,271],"data":[20,82,305],"augmentations":[21,83,306],"and":[22,35,52,75,131,186,256,270,277,290,307],"(2)":[23],"triplet":[24,85,308],"contrastive":[25,86,309],"samples":[26,87,142,310],"construction.":[27,311],"However,":[28],"designing":[29],"these":[30,215],"strategies":[31],"is":[32,77,124,143],"always":[33],"heuristics":[34],"hard.":[36],"Instead":[37],"of":[38,67,141,149,158,178,298],"exploring":[39],"more":[40],"similar":[41],"strategies,":[42],"this":[43,46],"paper":[44],"addresses":[45],"issue":[47],"with":[48],"a":[49,54,63,117,132,163,201,230],"different":[50,176],"view":[51],"proposes":[53],"novel":[55],"Contrastive":[56,231],"Spatio-Temporal":[57,232],"Clustering":[58,166,233],"(CSTC)":[59],"module.":[60],"CSTC":[61,91,285],"constructs":[62],"supervised":[64],"signal":[65],"(pseudo-label)":[66],"sequences":[69],"in":[70,243,267,274],"an":[71,96],"online":[72],"clustering":[73,188,245],"manner,":[74],"it":[76,209],"complementary":[78],"to":[79,111,126,145,154,170,207],"the":[80,102,106,113,128,147,156,179,195,212,221,225,244,275,282,296],"recent":[81],"or":[84],"construction":[88],"strategies.":[89],"Specifically,":[90],"can":[92,237],"be":[93],"formulated":[94],"as":[95],"optimal":[97,108],"transport":[98,109],"problem.":[99],"we":[100,161,199,228],"introduce":[101],"spatio-temporal":[103,241],"regularizations":[104],"into":[105,224],"original":[107],"term":[110],"guide":[112],"pseudo-label":[114],"generation,":[115],"i.e.,":[116],"semantic":[118],"regularization":[119,136],"learned":[120,196],"by":[121,210],"frame":[122,129],"index":[123],"proposed":[125,144],"constrain":[127],"order,":[130],"prior":[133],"normal":[134],"distribution":[135],"based":[137],"sampling":[139],"characteristics":[140],"maintain":[146],"dependability":[148],"spatial":[150,185],"cluster":[151,172],"assignments.":[152],"Furthermore,":[153],"enhance":[155],"latent":[159],"features,":[160],"propose":[162,200,229],"Bidirectional":[164],"Cross-modal":[165],"Consistency":[167],"Objective":[168],"(B3CO)":[169],"enforce":[171],"assignments":[173],"consistency":[174],"modalities":[177],"same":[180],"sample.":[181],"Last,":[182],"since":[183],"fusing":[184],"temporal":[187],"losses":[189,217],"directly":[190],"during":[191],"back-propagation":[192],"will":[193],"confuse":[194],"dimension-specific":[197],"semantics,":[198],"simple":[202],"yet":[203],"effective":[204],"training":[205,211],"strategy":[206],"fix":[208],"model":[213],"using":[214],"two":[216],"alternately.":[218],"By":[219],"integrating":[220],"above":[222],"designs":[223],"MoCo":[226],"framework,":[227],"Network":[234],"(CSTCN),":[235],"which":[236,302],"excavate":[238],"cross-modal":[239],"discriminative":[240],"features":[242],"space.":[246],"Experimental":[247],"results":[248],"NTU":[250,253],"RGB+D":[251,254],"60,":[252],"120,":[255],"PKU-MMD":[257],"II":[258],"datasets":[259],"show":[260],"that":[261],"CSTCN":[262],"achieves":[263,291],"state":[264],"of-the-art":[265],"performance":[266,293],"both":[268],"single-":[269],"models,":[272],"especially":[273],"KNN":[276],"semi-supervised":[278],"evaluation":[279],"protocols.":[280],"Besides,":[281],"key":[283],"module":[284],"shows":[286],"good":[287],"generalization":[288],"capability,":[289],"consistent":[292],"improvement":[294],"basis":[297],"several":[299],"state-of-the-art":[300]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4386108371","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2024-12-01T00:46:50.597225","created_date":"2023-08-24"}