{"id":"https://openalex.org/W4389984378","doi":"https://doi.org/10.48550/arxiv.2312.11142","title":"Efficiency-oriented approaches for self-supervised speech representation learning","display_name":"Efficiency-oriented approaches for self-supervised speech representation learning","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4389984378","doi":"https://doi.org/10.48550/arxiv.2312.11142"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2312.11142","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2312.11142","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031248134","display_name":"Luis Lugo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lugo, Luis","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5019268455","display_name":"Valentin Vielzeuf","orcid":"https://orcid.org/0000-0002-2987-2510"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vielzeuf, Valentin","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":66},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9961,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9961,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9844,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.983,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.5021274},{"id":"https://openalex.org/keywords/identification","display_name":"Identification","score":0.49385267},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature Learning","score":0.47642097},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised Learning","score":0.47516793}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.81605375},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.61839026},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.60694444},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5845635},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5021274},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.49385267},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.47642097},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.47516793},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.469064},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.464327},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.43398428},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32063514},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2312.11142","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2312.11142","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2312.11142","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.85,"display_name":"Affordable and clean energy"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W96612179","https://openalex.org/W632915154","https://openalex.org/W4256492088","https://openalex.org/W4229499248","https://openalex.org/W3048601286","https://openalex.org/W2987774938","https://openalex.org/W2965925734","https://openalex.org/W2770234245","https://openalex.org/W2566006169","https://openalex.org/W2515319207"],"abstract_inverted_index":{"Self-supervised":[0],"learning":[1,66],"enables":[2],"the":[3,10,35,53,92,96,99,103,120],"training":[4,100],"of":[5,94,98,105,134],"large":[6,113],"neural":[7,137],"models":[8,51],"without":[9],"need":[11],"for":[12,67,143],"large,":[13],"labeled":[14],"datasets.":[15],"It":[16],"has":[17],"been":[18],"generating":[19],"breakthroughs":[20],"in":[21,37,64,81,129,141,164],"several":[22,38],"fields,":[23],"including":[24,69],"computer":[25],"vision,":[26],"natural":[27],"language":[28],"processing,":[29],"biology,":[30],"and":[31,72,102,147],"speech.":[32],"In":[33],"particular,":[34],"state-of-the-art":[36],"speech":[39,45,144],"processing":[40,145],"applications,":[41],"such":[42],"as":[43],"automatic":[44],"recognition":[46],"or":[47],"speaker":[48],"identification,":[49],"are":[50],"where":[52],"latent":[54],"representation":[55,166],"is":[56],"learned":[57],"using":[58],"self-supervised":[59,65,114,165],"approaches.":[60,74],"Several":[61],"configurations":[62],"exist":[63],"speech,":[68],"contrastive,":[70],"predictive,":[71],"multilingual":[73],"There":[75],"is,":[76],"however,":[77],"a":[78],"crucial":[79],"limitation":[80],"most":[82],"existing":[83,135],"approaches:":[84],"their":[85],"high":[86,124,161],"computational":[87,162],"costs.":[88],"These":[89],"costs":[90,122,163],"limit":[91],"deployment":[93],"models,":[95,136],"size":[97],"dataset,":[101],"number":[104],"research":[106,111],"groups":[107],"that":[108,123],"can":[109],"afford":[110],"with":[112],"models.":[115],"Likewise,":[116],"we":[117],"should":[118],"consider":[119],"environmental":[121],"energy":[125],"consumption":[126],"implies.":[127],"Efforts":[128],"this":[130],"direction":[131],"comprise":[132],"optimization":[133],"architecture":[138],"efficiency,":[139],"improvements":[140],"finetuning":[142],"tasks,":[146],"data":[148],"efficiency.":[149],"But":[150],"despite":[151],"current":[152],"efforts,":[153],"more":[154],"work":[155],"could":[156],"be":[157],"done":[158],"to":[159],"address":[160],"learning.":[167]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4389984378","counts_by_year":[],"updated_date":"2025-02-23T04:05:06.082392","created_date":"2023-12-20"}