{"id":"https://openalex.org/W3203388655","doi":"https://doi.org/10.1109/icassp43922.2022.9746979","title":"Improving Confidence Estimation on Out-of-Domain Data for End-to-End Speech Recognition","display_name":"Improving Confidence Estimation on Out-of-Domain Data for End-to-End Speech Recognition","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W3203388655","doi":"https://doi.org/10.1109/icassp43922.2022.9746979","mag":"3203388655"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746979","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2110.03327","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030757951","display_name":"Qiujia Li","orcid":"https://orcid.org/0000-0003-3074-3692"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Qiujia Li","raw_affiliation_strings":["University of Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"University of Cambridge, UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100433648","display_name":"Yu Zhang","orcid":"https://orcid.org/0000-0002-9505-1833"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079902855","display_name":"David Qiu","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Qiu","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101319167","display_name":"Yanzhang He","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanzhang He","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103187717","display_name":"Liangliang Cao","orcid":"https://orcid.org/0000-0003-0900-1512"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liangliang Cao","raw_affiliation_strings":["Google LLC, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002191410","display_name":"Philip C. Woodland","orcid":"https://orcid.org/0000-0001-9069-0225"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Philip C. Woodland","raw_affiliation_strings":["University of Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"University of Cambridge, UK","institution_ids":["https://openalex.org/I241749"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.482,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.658299,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":80,"max":83},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9961,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.8459503},{"id":"https://openalex.org/keywords/low-confidence","display_name":"Low Confidence","score":0.50580645}],"concepts":[{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.8459503},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.7831036},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.74910456},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.56379336},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.54745203},{"id":"https://openalex.org/C2909755999","wikidata":"https://www.wikidata.org/wiki/Q4751126","display_name":"Low Confidence","level":2,"score":0.50580645},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.46385905},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.445575},{"id":"https://openalex.org/C44249647","wikidata":"https://www.wikidata.org/wiki/Q208498","display_name":"Confidence interval","level":2,"score":0.435894},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4208306},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32367837},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1870893},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10209134},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.07159796},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746979","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2110.03327","pdf_url":"https://arxiv.org/pdf/2110.03327","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2110.03327","pdf_url":"https://arxiv.org/pdf/2110.03327","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.64,"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":42,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W173561343","https://openalex.org/W1795658042","https://openalex.org/W1828163288","https://openalex.org/W1966716734","https://openalex.org/W2034537249","https://openalex.org/W2095629250","https://openalex.org/W2121879602","https://openalex.org/W2134659216","https://openalex.org/W2143577772","https://openalex.org/W2159391028","https://openalex.org/W2161391345","https://openalex.org/W2251321385","https://openalex.org/W2280141299","https://openalex.org/W2594610113","https://openalex.org/W2626967530","https://openalex.org/W2791556425","https://openalex.org/W2898630520","https://openalex.org/W2936774411","https://openalex.org/W2950170869","https://openalex.org/W2963505832","https://openalex.org/W2964212410","https://openalex.org/W2970859221","https://openalex.org/W2995181338","https://openalex.org/W3036601975","https://openalex.org/W3093579165","https://openalex.org/W3094667432","https://openalex.org/W3094721881","https://openalex.org/W3097375352","https://openalex.org/W3099782249","https://openalex.org/W3101648800","https://openalex.org/W3150122400","https://openalex.org/W3160766462","https://openalex.org/W3161324588","https://openalex.org/W3163169798","https://openalex.org/W3196548933","https://openalex.org/W3197451691","https://openalex.org/W3198654230","https://openalex.org/W3199610983","https://openalex.org/W34303869","https://openalex.org/W4234016251","https://openalex.org/W854541894"],"related_works":["https://openalex.org/W4287591324","https://openalex.org/W4226420367","https://openalex.org/W3108503355","https://openalex.org/W3107204728","https://openalex.org/W3090555870","https://openalex.org/W2980176872","https://openalex.org/W2962876041","https://openalex.org/W2801655600","https://openalex.org/W2268150819","https://openalex.org/W2249953602"],"abstract_inverted_index":{"As":[0],"end-to-end":[1],"automatic":[2],"speech":[3,45],"recognition":[4],"(ASR)":[5],"models":[6,68],"reach":[7],"promising":[8],"performance,":[9],"various":[10],"downstream":[11],"tasks":[12],"rely":[13],"on":[14,72,104,121,135,152],"good":[15],"confidence":[16,27,60,67,102,133,147],"estimators":[17,28,61,103,148],"for":[18,163],"these":[19],"systems.":[20],"Recent":[21],"research":[22],"has":[23],"shown":[24],"that":[25,125],"model-based":[26,101],"have":[29],"a":[30,158],"significant":[31],"advantage":[32],"over":[33],"using":[34,107],"the":[35,40,44,54,58,73,78,89,100,126,132,145],"output":[36],"softmax":[37],"probabilities.":[38],"If":[39],"input":[41],"data":[42,76,154,164],"to":[43,81,98],"recogniser":[46],"is":[47,85],"from":[48],"mismatched":[49],"acoustic":[50],"and":[51,57,110,137,155],"linguistic":[52],"conditions,":[53],"ASR":[55,90,118],"performance":[56],"corresponding":[59],"may":[62],"exhibit":[63],"severe":[64],"degradation.":[65],"Since":[66],"are":[69,149],"often":[70],"trained":[71,120],"same":[74],"in-domain":[75,142],"as":[77],"ASR,":[79],"generalising":[80],"out-of-domain":[82],"(OOD)":[83],"scenarios":[84],"challenging.":[86],"By":[87],"keeping":[88],"model":[91,119],"untouched,":[92],"this":[93],"paper":[94],"proposes":[95],"two":[96],"approaches":[97],"improve":[99,131],"OOD":[105,113,153],"data:":[106],"pseudo":[108],"transcriptions":[109],"an":[111,117],"additional":[112],"language":[114],"model.":[115],"With":[116],"LibriSpeech,":[122],"experiments":[123],"show":[124],"proposed":[127],"methods":[128],"can":[129,156],"greatly":[130],"metrics":[134],"TED-LIUM":[136],"Switchboard":[138],"datasets":[139],"while":[140],"preserving":[141],"performance.":[143],"Furthermore,":[144],"improved":[146],"better":[150],"calibrated":[151],"provide":[157],"much":[159],"more":[160],"reliable":[161],"criterion":[162],"selection.":[165]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3203388655","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2}],"updated_date":"2025-01-03T21:13:06.774874","created_date":"2021-10-11"}