{"id":"https://openalex.org/W4225529283","doi":"https://doi.org/10.21437/interspeech.2022-11034","title":"Pseudo Label Is Better Than Human Label","display_name":"Pseudo Label Is Better Than Human Label","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4225529283","doi":"https://doi.org/10.21437/interspeech.2022-11034"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11034","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2203.12668","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014088530","display_name":"Dongseong Hwang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dongseong Hwang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032798707","display_name":"Khe Chai Sim","orcid":"https://orcid.org/0000-0002-0866-2223"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khe Chai Sim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069872293","display_name":"Zhouyuan Huo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhouyuan Huo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032931723","display_name":"Trevor Strohman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Trevor Strohman","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.409,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":15,"citation_normalized_percentile":{"value":0.658299,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1421","last_page":"1425"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.999,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9982,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.63407034},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription","score":0.56463915},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4565709},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.4289497}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8263136},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6352772},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.63407034},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.575681},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.57566607},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.56463915},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.53234845},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46016783},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4565709},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.4289497},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4254712},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.41144037},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-11034","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2203.12668","pdf_url":"http://arxiv.org/pdf/2203.12668","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2203.12668","pdf_url":"http://arxiv.org/pdf/2203.12668","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.84}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":32,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1828163288","https://openalex.org/W2033256038","https://openalex.org/W2121879602","https://openalex.org/W2617258110","https://openalex.org/W2911544293","https://openalex.org/W2962369866","https://openalex.org/W2963122170","https://openalex.org/W2963747784","https://openalex.org/W2973049979","https://openalex.org/W3001197829","https://openalex.org/W3008181812","https://openalex.org/W3015194534","https://openalex.org/W3016234571","https://openalex.org/W3026041220","https://openalex.org/W3035160371","https://openalex.org/W3036601975","https://openalex.org/W3093579165","https://openalex.org/W3097777922","https://openalex.org/W3160628828","https://openalex.org/W3160766462","https://openalex.org/W3161324588","https://openalex.org/W3163203022","https://openalex.org/W3169320628","https://openalex.org/W3204397973","https://openalex.org/W3204696009","https://openalex.org/W4210690962","https://openalex.org/W4221161761","https://openalex.org/W4226033575","https://openalex.org/W4226380987","https://openalex.org/W4290711009","https://openalex.org/W4307023467"],"related_works":["https://openalex.org/W4398173524","https://openalex.org/W4386907265","https://openalex.org/W4221142855","https://openalex.org/W2594897229","https://openalex.org/W2293740626","https://openalex.org/W2151348424","https://openalex.org/W2118730790","https://openalex.org/W2058752985","https://openalex.org/W2050138804","https://openalex.org/W200981350"],"abstract_inverted_index":{"State-of-the-art":[0],"automatic":[1],"speech":[2,16],"recognition":[3],"(ASR)":[4],"systems":[5],"are":[6],"trained":[7,42],"with":[8,43],"tens":[9],"of":[10,12,14,30,38],"thousands":[11],"hours":[13],"labeled":[15],"data.Human":[17],"transcription":[18,32],"is":[19],"expensive":[20],"and":[21,28,68,78],"time":[22],"consuming.Factors":[23],"such":[24],"as":[25],"the":[26,31,36,39],"quality":[27,61],"consistency":[29],"can":[33,52,128],"greatly":[34],"affect":[35],"performance":[37],"ASR":[40],"models":[41],"these":[44],"data.In":[45],"this":[46,116],"paper,":[47],"we":[48,51,72,127],"show":[49,112],"that":[50,113],"train":[53,85],"a":[54,86,101,109,138],"strong":[55,117],"teacher":[56,82,91,118],"model":[57,93,119,140],"to":[58,84,120,135,142],"produce":[59],"high":[60],"pseudo":[62,123],"labels":[63,124],"by":[64,114],"utilizing":[65],"recent":[66],"self-supervised":[67],"semi-supervised":[69],"learning":[70],"techniques.Specifically,":[71],"use":[73],"JUST":[74],"(Joint":[75],"Unsupervised/Supervised":[76],"Training)":[77],"iterative":[79],"noisy":[80],"student":[81],"training":[83],"600":[87],"million":[88],"parameter":[89],"bi-directional":[90],"model.This":[92],"achieved":[94],"4.0%":[95],"word":[96],"error":[97],"rate":[98],"(WER)":[99],"on":[100],"voice":[102],"search":[103],"task,":[104],"11.1%":[105],"relatively":[106],"better":[107],"than":[108],"baseline.We":[110],"further":[111],"using":[115,143],"generate":[121],"high-quality":[122],"for":[125,137],"training,":[126],"achieve":[129],"13.6%":[130],"relative":[131],"WER":[132],"reduction":[133],"(5.9%":[134],"5.1%)":[136],"streaming":[139],"compared":[141],"human":[144],"labels.":[145]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4225529283","counts_by_year":[{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":11}],"updated_date":"2024-12-11T20:27:49.469968","created_date":"2022-05-05"}