{"id":"https://openalex.org/W2054716580","doi":"https://doi.org/10.1109/jstsp.2012.2229257","title":"A Comprehensive Reinforcement Learning Framework for Dialogue Management Optimization","display_name":"A Comprehensive Reinforcement Learning Framework for Dialogue Management Optimization","publication_year":2012,"publication_date":"2012-11-21","ids":{"openalex":"https://openalex.org/W2054716580","doi":"https://doi.org/10.1109/jstsp.2012.2229257","mag":"2054716580"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2012.2229257","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072253501","display_name":"Lucie Daubigney","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lucie Daubigney","raw_affiliation_strings":["Autonomous intelligent machine","IMS : Information, Multimodalit\u00e9 & Signal"],"affiliations":[{"raw_affiliation_string":"IMS : Information, Multimodalit\u00e9 & Signal","institution_ids":[]},{"raw_affiliation_string":"Autonomous intelligent machine","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110482875","display_name":"Matthieu Geist","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matthieu Geist","raw_affiliation_strings":["IMS : Information, Multimodalit\u00e9 & Signal"],"affiliations":[{"raw_affiliation_string":"IMS : Information, Multimodalit\u00e9 & Signal","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031626252","display_name":"Senthilkumar Chandramohan","orcid":"https://orcid.org/0000-0002-8909-5485"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Senthilkumar Chandramohan","raw_affiliation_strings":["IMS : Information, Multimodalit\u00e9 & Signal"],"affiliations":[{"raw_affiliation_string":"IMS : Information, Multimodalit\u00e9 & Signal","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065100569","display_name":"Olivier Pietquin","orcid":"https://orcid.org/0000-0002-5386-465X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olivier Pietquin","raw_affiliation_strings":["IMS : Information, Multimodalit\u00e9 & Signal"],"affiliations":[{"raw_affiliation_string":"IMS : Information, Multimodalit\u00e9 & Signal","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":7.942,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":55,"citation_normalized_percentile":{"value":0.927984,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"6","issue":"8","first_page":"891","last_page":"902"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9946,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8283466},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7863227},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.575557},{"id":"https://openalex.org/C2778496695","wikidata":"https://www.wikidata.org/wiki/Q254128","display_name":"Dilemma","level":2,"score":0.51509255},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.448954},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.36816674},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.090699136},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2012.2229257","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://hal-centralesupelec.archives-ouvertes.fr/hal-00771646","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://centralesupelec.hal.science/hal-00771646","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://hal-supelec.archives-ouvertes.fr/hal-00771646","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.41}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":48,"referenced_works":["https://openalex.org/W117128830","https://openalex.org/W1211946649","https://openalex.org/W1485515110","https://openalex.org/W1547105496","https://openalex.org/W160067033","https://openalex.org/W1749494163","https://openalex.org/W1778387566","https://openalex.org/W196536594","https://openalex.org/W2021151961","https://openalex.org/W2035934535","https://openalex.org/W2040123554","https://openalex.org/W2047192163","https://openalex.org/W2062175565","https://openalex.org/W2076337359","https://openalex.org/W2099118758","https://openalex.org/W2101308260","https://openalex.org/W2105934661","https://openalex.org/W2106547558","https://openalex.org/W2107961737","https://openalex.org/W2109038907","https://openalex.org/W2109504867","https://openalex.org/W2112476714","https://openalex.org/W2115101920","https://openalex.org/W2117989772","https://openalex.org/W2131600418","https://openalex.org/W2132997613","https://openalex.org/W2134051188","https://openalex.org/W2134466368","https://openalex.org/W2139418546","https://openalex.org/W2150923691","https://openalex.org/W2151814822","https://openalex.org/W2151904921","https://openalex.org/W2154740693","https://openalex.org/W2156974606","https://openalex.org/W2158984235","https://openalex.org/W2163068732","https://openalex.org/W2168490009","https://openalex.org/W2169430966","https://openalex.org/W2172501646","https://openalex.org/W2180877127","https://openalex.org/W2400904432","https://openalex.org/W264301034","https://openalex.org/W2787259794","https://openalex.org/W368409423","https://openalex.org/W4214717370","https://openalex.org/W4242606736","https://openalex.org/W4285719527","https://openalex.org/W52170320"],"related_works":["https://openalex.org/W4386462264","https://openalex.org/W4313488044","https://openalex.org/W4312192474","https://openalex.org/W4306674287","https://openalex.org/W4210805261","https://openalex.org/W3209574120","https://openalex.org/W3170094116","https://openalex.org/W3107602296","https://openalex.org/W3046775127","https://openalex.org/W2961085424"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"is":[2],"now":[3],"an":[4],"acknowledged":[5],"approach":[6],"for":[7],"optimizing":[8],"the":[9,17,43,83,94,101],"interaction":[10],"strategy":[11,105],"of":[12,103],"spoken":[13],"dialogue":[14,65,79,104,133],"systems.":[15],"If":[16],"first":[18],"considered":[19],"algorithms":[20,52],"were":[21],"quite":[22],"basic":[23],"(like":[24],"SARSA),":[25],"recent":[26],"works":[27],"concentrated":[28],"on":[29,129],"more":[30],"sophisticated":[31],"methods.":[32],"More":[33],"attention":[34],"has":[35],"been":[36,54,62],"paid":[37],"to":[38,56,64,92,100,109],"off-policy":[39],"learning,":[40],"dealing":[41],"with":[42,118],"exploration-exploitation":[44],"dilemma,":[45],"sample":[46],"efficiency":[47],"or":[48],"handling":[49],"non-stationarity.":[50],"New":[51],"have":[53,61],"proposed":[55],"address":[57,110],"these":[58,112],"issues":[59,113],"and":[60,137],"applied":[63],"management.":[66],"However,":[67],"each":[68],"algorithm":[69],"often":[70],"solves":[71],"a":[72,76,115,119],"single":[73,120],"issue":[74],"at":[75,85],"time,":[77],"while":[78],"systems":[80],"exhibit":[81],"all":[82,111],"problems":[84],"once.":[86],"In":[87],"this":[88],"paper,":[89],"we":[90],"propose":[91],"apply":[93],"Kalman":[95],"Temporal":[96],"Differences":[97],"(KTD)":[98],"framework":[99],"problem":[102],"optimization":[106],"so":[107],"as":[108],"in":[114],"comprehensive":[116],"manner":[117],"framework.":[121],"Our":[122],"claims":[123],"are":[124],"illustrated":[125],"by":[126],"experiments":[127],"led":[128],"two":[130],"real-world":[131],"goal-oriented":[132],"management":[134],"frameworks,":[135],"DIPPER":[136],"HIS.":[138]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2054716580","counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":10},{"year":2015,"cited_by_count":10},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":6}],"updated_date":"2024-12-09T11:50:02.332292","created_date":"2016-06-24"}