{"id":"https://openalex.org/W1533416326","doi":"https://doi.org/10.1109/icassp.2015.7178778","title":"Learning acoustic frame labeling for speech recognition with recurrent neural networks","display_name":"Learning acoustic frame labeling for speech recognition with recurrent neural networks","publication_year":2015,"publication_date":"2015-04-01","ids":{"openalex":"https://openalex.org/W1533416326","doi":"https://doi.org/10.1109/icassp.2015.7178778","mag":"1533416326"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2015.7178778","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108537573","display_name":"Ha\u015fim Sak","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"funder","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hasim Sak","raw_affiliation_strings":["Google, USA."],"affiliations":[{"raw_affiliation_string":"Google, USA.","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079708487","display_name":"Andrew Senior","orcid":"https://orcid.org/0000-0002-2401-5691"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"funder","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew Senior","raw_affiliation_strings":["Google, USA."],"affiliations":[{"raw_affiliation_string":"Google, USA.","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111514705","display_name":"Kanishka Rao","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"funder","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kanishka Rao","raw_affiliation_strings":["Google, USA."],"affiliations":[{"raw_affiliation_string":"Google, USA.","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088262368","display_name":"Ozan \u0130rsoy","orcid":"https://orcid.org/0000-0002-7123-8361"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"funder","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ozan Irsoy","raw_affiliation_strings":["Google, USA."],"affiliations":[{"raw_affiliation_string":"Google, USA.","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043473089","display_name":"Alex Graves","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"funder","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alex Graves","raw_affiliation_strings":["Google, USA."],"affiliations":[{"raw_affiliation_string":"Google, USA.","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110711540","display_name":"Fran\u00e7oise Beaufays","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"funder","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Francoise Beaufays","raw_affiliation_strings":["Google, USA."],"affiliations":[{"raw_affiliation_string":"Google, USA.","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081836495","display_name":"Johan Schalkwyk","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"funder","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Johan Schalkwyk","raw_affiliation_strings":["Google, USA."],"affiliations":[{"raw_affiliation_string":"Google, USA.","institution_ids":["https://openalex.org/I1291425158"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":14.095,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":189,"citation_normalized_percentile":{"value":0.953626,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"4280","last_page":"4284"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.77629995},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.52928597}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8338423},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.8056259},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.77629995},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.75678205},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.75175226},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5339519},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.52928597},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.50826186},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.50021696},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.49386722},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.49339452},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.48996225},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38214865},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.26503327},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2015.7178778","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.74,"display_name":"Reduced inequalities"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":24,"referenced_works":["https://openalex.org/W1499864241","https://openalex.org/W1574530145","https://openalex.org/W1978660892","https://openalex.org/W1987238397","https://openalex.org/W2005708641","https://openalex.org/W2114016253","https://openalex.org/W2120480077","https://openalex.org/W2125838338","https://openalex.org/W2127141656","https://openalex.org/W2131342762","https://openalex.org/W2131774270","https://openalex.org/W2143612262","https://openalex.org/W2147768505","https://openalex.org/W2158373110","https://openalex.org/W2160815625","https://openalex.org/W2168231600","https://openalex.org/W2253807446","https://openalex.org/W2293009711","https://openalex.org/W2293634267","https://openalex.org/W2296748324","https://openalex.org/W2394932179","https://openalex.org/W2397682113","https://openalex.org/W2403195671","https://openalex.org/W811578723"],"related_works":["https://openalex.org/W4389116644","https://openalex.org/W4205841273","https://openalex.org/W4205525690","https://openalex.org/W3103844505","https://openalex.org/W2965546495","https://openalex.org/W2153315159","https://openalex.org/W2121652828","https://openalex.org/W1997922073","https://openalex.org/W1761388607","https://openalex.org/W107086600"],"abstract_inverted_index":{"We":[0,53,65,78,96],"explore":[1],"alternative":[2],"acoustic":[3,20,75,86],"modeling":[4,76],"techniques":[5],"for":[6,48,142],"large":[7],"vocabulary":[8],"speech":[9],"recognition":[10],"using":[11,33,105,120],"Long":[12],"Short-Term":[13],"Memory":[14],"recurrent":[15],"neural":[16],"networks.":[17],"For":[18],"an":[19,113],"frame":[21],"labeling":[22,49],"task,":[23],"we":[24,125],"compare":[25],"the":[26,41,56,81,128,139],"conventional":[27],"approach":[28],"of":[29,36,83,130,145],"cross-entropy":[30],"(CE)":[31],"training":[32,89,133,144],"fixed":[34],"forced-alignments":[35],"frames":[37],"and":[38,69,91,137],"labels,":[39],"with":[40,61,67,118],"Connectionist":[42],"Temporal":[43],"Classification":[44],"(CTC)":[45],"method":[46],"proposed":[47],"unsegmented":[50],"sequence":[51,131],"data.":[52],"demonstrate":[54],"that":[55,98],"latter":[57],"can":[58,108],"be":[59],"implemented":[60],"finite":[62],"state":[63,122],"transducers.":[64],"experiment":[66],"phones":[68],"context":[70,84],"dependent":[71],"HMM":[72,121],"states":[73],"as":[74,110,112],"units.":[77],"also":[79,126],"investigate":[80],"effect":[82,129],"in":[85],"input":[87],"by":[88],"unidirectional":[90],"bidirectional":[92,100],"LSTM":[93,101,114],"RNN":[94,102,115],"models.":[95,147],"show":[97,127,138],"a":[99],"CTC":[103,146],"model":[104,116],"phone":[106],"units":[107],"perform":[109],"well":[111],"trained":[117],"CE":[119],"alignments.":[123],"Finally,":[124],"discriminative":[132],"on":[134],"these":[135],"models":[136],"first":[140],"results":[141],"sMBR":[143]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W1533416326","counts_by_year":[{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":15},{"year":2020,"cited_by_count":22},{"year":2019,"cited_by_count":24},{"year":2018,"cited_by_count":35},{"year":2017,"cited_by_count":38},{"year":2016,"cited_by_count":21},{"year":2015,"cited_by_count":11}],"updated_date":"2025-04-19T11:50:42.767599","created_date":"2016-06-24"}