{"id":"https://openalex.org/W2033256038","doi":"https://doi.org/10.1109/asru.2013.6707758","title":"Large scale deep neural network acoustic modeling with semi-supervised training data for YouTube video transcription","display_name":"Large scale deep neural network acoustic modeling with semi-supervised training data for YouTube video transcription","publication_year":2013,"publication_date":"2013-12-01","ids":{"openalex":"https://openalex.org/W2033256038","doi":"https://doi.org/10.1109/asru.2013.6707758","mag":"2033256038"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2013.6707758","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/41403.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110698977","display_name":"Hank Liao","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hank Liao","raw_affiliation_strings":["Google (United States), Mountain View, United States"],"affiliations":[{"raw_affiliation_string":"Google (United States), Mountain View, United States","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030109648","display_name":"Erik McDermott","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Erik McDermott","raw_affiliation_strings":["Google\u2020#TAB#"],"affiliations":[{"raw_affiliation_string":"Google\u2020#TAB#","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079708487","display_name":"Andrew Senior","orcid":"https://orcid.org/0000-0002-2401-5691"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew Senior","raw_affiliation_strings":["Google (United States), Mountain View, United States"],"affiliations":[{"raw_affiliation_string":"Google (United States), Mountain View, United States","institution_ids":["https://openalex.org/I1291425158"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":7.365,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":201,"citation_normalized_percentile":{"value":0.961899,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech Enhancement Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech Enhancement Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Audio Signal Classification and Analysis","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition Technology","score":0.9992,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/upload","display_name":"Upload","score":0.561037},{"id":"https://openalex.org/keywords/audio-visual-speech-recognition","display_name":"Audio-Visual Speech Recognition","score":0.558833},{"id":"https://openalex.org/keywords/acoustic-modeling","display_name":"Acoustic Modeling","score":0.53112},{"id":"https://openalex.org/keywords/automatic-speech-recognition","display_name":"Automatic Speech Recognition","score":0.523894},{"id":"https://openalex.org/keywords/audio-event-detection","display_name":"Audio Event Detection","score":0.513299},{"id":"https://openalex.org/keywords/end-to-end-speech-recognition","display_name":"End-to-End Speech Recognition","score":0.503494},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.42687774},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.4150579}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.83266723},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6000753},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5994462},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.5897782},{"id":"https://openalex.org/C71901391","wikidata":"https://www.wikidata.org/wiki/Q7126699","display_name":"Upload","level":2,"score":0.561037},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.48555157},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.47459766},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.4623704},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43749654},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4279908},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.42687774},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.4150579},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3527185},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.28063622},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.11903098},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2013.6707758","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.675.5577","pdf_url":"http://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/41403.pdf","source":{"id":"https://openalex.org/S4306400349","display_name":"CiteSeer X (The Pennsylvania State University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I130769515","host_organization_name":"Pennsylvania State University","host_organization_lineage":["https://openalex.org/I130769515"],"host_organization_lineage_names":["Pennsylvania State University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.675.5577","pdf_url":"http://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/41403.pdf","source":{"id":"https://openalex.org/S4306400349","display_name":"CiteSeer X (The Pennsylvania State University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I130769515","host_organization_name":"Pennsylvania State University","host_organization_lineage":["https://openalex.org/I130769515"],"host_organization_lineage_names":["Pennsylvania State University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.49,"display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":25,"referenced_works":["https://openalex.org/W1218987319","https://openalex.org/W137106866","https://openalex.org/W1498436455","https://openalex.org/W1566315437","https://openalex.org/W1975113979","https://openalex.org/W1987238397","https://openalex.org/W1993882792","https://openalex.org/W2000200144","https://openalex.org/W2010362084","https://openalex.org/W2058641082","https://openalex.org/W2076048613","https://openalex.org/W2076794394","https://openalex.org/W2105961775","https://openalex.org/W2109664771","https://openalex.org/W2134659216","https://openalex.org/W2144792281","https://openalex.org/W2147768505","https://openalex.org/W2158069733","https://openalex.org/W2160306971","https://openalex.org/W2171928131","https://openalex.org/W217970951","https://openalex.org/W2296748324","https://openalex.org/W2394932179","https://openalex.org/W2406262283","https://openalex.org/W50384900"],"related_works":["https://openalex.org/W767271433","https://openalex.org/W4290708361","https://openalex.org/W4221142855","https://openalex.org/W2594897229","https://openalex.org/W2244609359","https://openalex.org/W2151348424","https://openalex.org/W2129812225","https://openalex.org/W2126322296","https://openalex.org/W2050138804","https://openalex.org/W1566315437"],"abstract_inverted_index":{"YouTube":[0,45,82],"is":[1,36],"a":[2,161],"highly":[3],"visited":[4],"video":[5,18,119],"sharing":[6],"website":[7],"where":[8],"over":[9],"one":[10,73],"billion":[11,15],"people":[12],"watch":[13],"six":[14],"hours":[16],"of":[17,40,74,87,117,140],"every":[19],"month.":[20],"Improving":[21],"accessibility":[22],"to":[23,93,109,121,144,175],"these":[24],"videos":[25,46,91],"for":[26,31,50,90,182],"the":[27,75,97,110,115,151],"hearing":[28],"impaired":[29],"and":[30,33,127,149],"search":[32],"indexing":[34],"purposes":[35],"an":[37,138],"excellent":[38],"application":[39],"automatic":[41,51,85],"speech":[42,52],"recognition.":[43],"However,":[44],"are":[47],"extremely":[48],"challenging":[49],"recognition":[53],"systems.":[54],"Standard":[55],"adapted":[56],"Gaussian":[57],"Mixture":[58],"Model":[59],"(GMM)":[60],"based":[61],"acoustic":[62,131],"models":[63,132],"can":[64],"have":[65,94],"word":[66],"error":[67],"rates":[68],"above":[69],"50%,":[70],"making":[71],"this":[72,183],"most":[76],"difficult":[77],"reported":[78,177],"tasks.":[79],"Since":[80],"2009,":[81],"has":[83],"provided":[84],"generation":[86],"closed":[88],"captions":[89],"detected":[92],"English":[95],"speech;":[96],"service":[98],"now":[99],"supports":[100],"ten":[101],"different":[102],"languages.":[103],"This":[104],"paper":[105],"describes":[106],"recent":[107],"improvements":[108],"original":[111],"system,":[112],"in":[113],"particular":[114],"use":[116],"owner-uploaded":[118],"transcripts":[120],"generate":[122],"additional":[123],"semi-supervised":[124],"training":[125,147],"data":[126],"deep":[128],"neural":[129],"networks":[130],"with":[133,160],"large":[134],"state":[135],"inventories.":[136],"Applying":[137],"\u201cisland":[139],"confidence\u201d":[141],"filtering":[142],"heuristic":[143],"select":[145],"useful":[146],"segments,":[148],"increasing":[150],"model":[152],"size":[153],"by":[154,170],"using":[155],"44,526":[156],"context":[157],"dependent":[158],"states":[159],"low-rank":[162],"final":[163],"layer":[164],"weight":[165],"matrix":[166],"approximation,":[167],"improved":[168],"performance":[169],"about":[171],"13%":[172],"relative":[173],"compared":[174],"previously":[176],"sequence":[178],"trained":[179],"DNN":[180],"results":[181],"task.":[184]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2033256038","counts_by_year":[{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":27},{"year":2020,"cited_by_count":27},{"year":2019,"cited_by_count":27},{"year":2018,"cited_by_count":22},{"year":2017,"cited_by_count":22},{"year":2016,"cited_by_count":12},{"year":2015,"cited_by_count":14},{"year":2014,"cited_by_count":10}],"updated_date":"2024-12-03T13:02:09.273264","created_date":"2016-06-24"}