{"id":"https://openalex.org/W2802514216","doi":"https://doi.org/10.1109/icassp.2018.8462578","title":"An End-to-End Approach to Joint Social Signal Detection and Automatic Speech Recognition","display_name":"An End-to-End Approach to Joint Social Signal Detection and Automatic Speech Recognition","publication_year":2018,"publication_date":"2018-04-01","ids":{"openalex":"https://openalex.org/W2802514216","doi":"https://doi.org/10.1109/icassp.2018.8462578","mag":"2802514216"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012410751","display_name":"Hirofumi Lnaguma","orcid":null},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hirofumi Lnaguma","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102851028","display_name":"Masato Mimura","orcid":"https://orcid.org/0000-0002-2403-0680"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masato Mimura","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058775745","display_name":"Koji Inoue","orcid":"https://orcid.org/0000-0002-2929-2559"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Koji Inoue","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067956319","display_name":"Kazuyoshi Yoshii","orcid":"https://orcid.org/0000-0001-8387-8609"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kazuyoshi Yoshii","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Japan","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038044080","display_name":"Tatsuya Kawahara","orcid":"https://orcid.org/0000-0002-2686-2296"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tatsuya Kawahara","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Japan","institution_ids":["https://openalex.org/I22299242"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.061,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":3,"citation_normalized_percentile":{"value":0.492655,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":74,"max":77},"biblio":{"volume":null,"issue":null,"first_page":"6214","last_page":"6218"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9974,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9974,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9954,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9868,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription","score":0.6113974},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.58337885},{"id":"https://openalex.org/keywords/laughter","display_name":"Laughter","score":0.5809883},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.44855106}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7588345},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.73357284},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.69358444},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.6890745},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.6113974},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.58337885},{"id":"https://openalex.org/C2780775679","wikidata":"https://www.wikidata.org/wiki/Q170579","display_name":"Laughter","level":2,"score":0.5809883},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.5354006},{"id":"https://openalex.org/C177291462","wikidata":"https://www.wikidata.org/wiki/Q423038","display_name":"Active listening","level":2,"score":0.49936342},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.44855106},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3900272},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.37949988},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.17639226},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.084313035},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.47,"display_name":"Reduced inequalities"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":37,"referenced_works":["https://openalex.org/W1489125746","https://openalex.org/W1522301498","https://openalex.org/W1969761120","https://openalex.org/W1974310822","https://openalex.org/W1976697056","https://openalex.org/W1993232824","https://openalex.org/W2046477426","https://openalex.org/W2051363508","https://openalex.org/W2061436640","https://openalex.org/W2064675550","https://openalex.org/W2118748593","https://openalex.org/W2127141656","https://openalex.org/W2133564696","https://openalex.org/W2140615673","https://openalex.org/W2143612262","https://openalex.org/W2144005487","https://openalex.org/W2161274063","https://openalex.org/W2193413348","https://openalex.org/W2271840356","https://openalex.org/W2294873498","https://openalex.org/W2341051357","https://openalex.org/W2395416438","https://openalex.org/W2396703939","https://openalex.org/W2397647387","https://openalex.org/W2403021840","https://openalex.org/W2404132853","https://openalex.org/W2404856031","https://openalex.org/W2515113348","https://openalex.org/W2565542385","https://openalex.org/W2750497738","https://openalex.org/W2962826786","https://openalex.org/W2963211739","https://openalex.org/W2963729456","https://openalex.org/W3034729383","https://openalex.org/W4211153864","https://openalex.org/W4235534324","https://openalex.org/W45820414"],"related_works":["https://openalex.org/W642007152","https://openalex.org/W4294771049","https://openalex.org/W3126788496","https://openalex.org/W2552102772","https://openalex.org/W2401827384","https://openalex.org/W2355290951","https://openalex.org/W2168417340","https://openalex.org/W2103239478","https://openalex.org/W2052688117","https://openalex.org/W1523214805"],"abstract_inverted_index":{"Social":[0],"signals":[1,59],"such":[2,42],"as":[3,39,43,123,125],"laughter":[4],"and":[5,13,33,91],"fillers":[6],"are":[7],"often":[8],"observed":[9],"in":[10,18],"natural":[11],"conversation,":[12],"they":[14],"play":[15],"various":[16],"roles":[17],"human-to-human":[19],"communication.":[20],"Detecting":[21],"these":[22],"events":[23],"is":[24,69],"useful":[25],"for":[26,34],"transcription":[27,32],"systems":[28,36],"to":[29,37,55,120],"generate":[30],"rich":[31],"dialogue":[35],"behave":[38],"we":[40,80],"do":[41],"synchronized":[44],"laughing":[45],"or":[46],"attentive":[47],"listening.":[48],"We":[49,96],"have":[50],"studied":[51],"an":[52],"end-to-end":[53,73,110],"approach":[54],"directly":[56],"detect":[57],"social":[58,87,103],"from":[60],"speech":[61,93],"by":[62],"using":[63],"connectionist":[64],"temporal":[65],"classification":[66],"(CTC),":[67],"which":[68],"one":[70],"of":[71],"the":[72,114,126],"sequence":[74],"labelling":[75,100],"models.":[76],"In":[77],"this":[78],"work,":[79],"propose":[81],"a":[82],"unified":[83],"framework":[84,111],"that":[85,108],"integrates":[86],"signal":[88],"detection":[89],"(SSD)":[90],"automatic":[92],"recognition":[94],"(ASR).":[95],"investigate":[97],"several":[98],"reference":[99],"methods":[101],"regarding":[102],"signals.":[104],"Experimental":[105],"evaluations":[106],"demonstrate":[107],"our":[109],"significantly":[112],"outperforms":[113],"conventional":[115],"DNN-HMM":[116],"system":[117],"with":[118],"regard":[119],"SSD":[121],"performance":[122],"well":[124],"character":[127],"error":[128],"rate":[129],"(CER).":[130]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2802514216","counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2024-12-11T20:09:21.655305","created_date":"2018-05-17"}