{"id":"https://openalex.org/W2015667537","doi":"https://doi.org/10.1109/21.21595","title":"Learning control of finite Markov chains with an explicit trade-off between estimation and control","display_name":"Learning control of finite Markov chains with an explicit trade-off between estimation and control","publication_year":1988,"publication_date":"1988-01-01","ids":{"openalex":"https://openalex.org/W2015667537","doi":"https://doi.org/10.1109/21.21595","mag":"2015667537"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/21.21595","pdf_url":null,"source":{"id":"https://openalex.org/S76152103","display_name":"IEEE Transactions on Systems Man and Cybernetics","issn_l":"0018-9472","issn":["0018-9472","2168-2909"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089733324","display_name":"Mitsuo Sat\u00f5","orcid":"https://orcid.org/0000-0003-4498-7428"},"institutions":[{"id":"https://openalex.org/I201537933","display_name":"Tohoku University","ror":"https://ror.org/01dq60k83","country_code":"JP","type":"education","lineage":["https://openalex.org/I201537933"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"M. Sato","raw_affiliation_strings":["Department of Electrical Engineering, University of Tohoku, Sendai, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, University of Tohoku, Sendai, Japan","institution_ids":["https://openalex.org/I201537933"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103145863","display_name":"K. Abe","orcid":"https://orcid.org/0009-0004-2279-382X"},"institutions":[{"id":"https://openalex.org/I136259955","display_name":"Toyohashi University of Technology","ror":"https://ror.org/04ezg6d83","country_code":"JP","type":"education","lineage":["https://openalex.org/I136259955"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"K. Abe","raw_affiliation_strings":["Department of Information and Computer Sciences, Toyohashi University of Technology, Toyoha, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Information and Computer Sciences, Toyohashi University of Technology, Toyoha, Japan","institution_ids":["https://openalex.org/I136259955"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013380810","display_name":"Hiroshi Takeda","orcid":"https://orcid.org/0000-0003-2566-4826"},"institutions":[{"id":"https://openalex.org/I201537933","display_name":"Tohoku University","ror":"https://ror.org/01dq60k83","country_code":"JP","type":"education","lineage":["https://openalex.org/I201537933"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"H. Takeda","raw_affiliation_strings":["Department of Electrical Engineering, University of Tohoku, Sendai, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, University of Tohoku, Sendai, Japan","institution_ids":["https://openalex.org/I201537933"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.153,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":33,"citation_normalized_percentile":{"value":0.846978,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":91},"biblio":{"volume":"18","issue":"5","first_page":"677","last_page":"684"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9892,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11178","display_name":"Receptor Mechanisms and Signaling","score":0.9863,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/maximization","display_name":"Maximization","score":0.71472216},{"id":"https://openalex.org/keywords/iterative-learning-control","display_name":"Iterative Learning Control","score":0.47119164}],"concepts":[{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.7436781},{"id":"https://openalex.org/C2776330181","wikidata":"https://www.wikidata.org/wiki/Q18358244","display_name":"Maximization","level":2,"score":0.71472216},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.69423586},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.60813713},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5799035},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.5687096},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.56801426},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.5544707},{"id":"https://openalex.org/C117619785","wikidata":"https://www.wikidata.org/wiki/Q6094414","display_name":"Iterative learning control","level":3,"score":0.47119164},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4256008},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.41251075},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.41175866},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3581928},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.16276589},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.13490584},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.12598273},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/21.21595","pdf_url":null,"source":{"id":"https://openalex.org/S76152103","display_name":"IEEE Transactions on Systems Man and Cybernetics","issn_l":"0018-9472","issn":["0018-9472","2168-2909"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.68,"display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":15,"referenced_works":["https://openalex.org/W1968904046","https://openalex.org/W2013009405","https://openalex.org/W2021050453","https://openalex.org/W2045097767","https://openalex.org/W2046577860","https://openalex.org/W2064018461","https://openalex.org/W2089954532","https://openalex.org/W2099724000","https://openalex.org/W2115447855","https://openalex.org/W2115597380","https://openalex.org/W2123225686","https://openalex.org/W2124402837","https://openalex.org/W2164294821","https://openalex.org/W2320680700","https://openalex.org/W2329331109"],"related_works":["https://openalex.org/W4388738109","https://openalex.org/W4386994694","https://openalex.org/W4255368532","https://openalex.org/W2761624296","https://openalex.org/W2362901947","https://openalex.org/W2362086884","https://openalex.org/W2350210972","https://openalex.org/W2162286586","https://openalex.org/W187740018","https://openalex.org/W1606071314"],"abstract_inverted_index":{"An":[0],"efficient":[1],"scheme":[2,23,46,110],"is":[3,24,50,106],"presented":[4],"for":[5,33],"a":[6,47,57,64,129],"learning":[7],"control":[8,48,73,122],"problem":[9],"of":[10,56,68,74,88,101,121,133],"finite":[11],"Markov":[12],"chains":[13],"with":[14,18,38],"unknown":[15,19,70],"dynamics,":[16],"i.e.":[17],"transition":[20],"probabilities.":[21],"The":[22,77],"designed":[25],"to":[26,36,138],"optimize":[27],"the":[28,69,75,86,92,102,126,139],"asymptotic":[29],"system":[30],"performance":[31,59],"and":[32,42,72],"easy":[34],"application":[35],"models":[37],"relatively":[39],"many":[40],"states":[41],"decisions.":[43],"In":[44],"this":[45,109],"policy":[49,78],"determined":[51],"each":[52],"time":[53],"through":[54],"maximization":[55],"simple":[58],"criterion":[60],"that":[61,108,128],"explicitly":[62],"incorporates":[63],"tradeoff":[65],"between":[66],"estimation":[67],"probabilities":[71],"system.":[76],"determination":[79],"can":[80,95],"be":[81,96],"easily":[82],"performed":[83],"even":[84],"in":[85,125],"case":[87],"large-size":[89],"models,":[90],"since":[91],"maximizing":[93],"operation":[94],"greatly":[97],"simplified":[98],"by":[99,118],"use":[100],"policy-iteration":[103],"method.":[104],"It":[105],"proven":[107],"becomes":[111],"epsilon":[112],"-optimal":[113],"as":[114,116],"well":[115],"optimal":[117,135],"suitable":[119],"choice":[120],"parameter":[123],"values":[124],"sense":[127],"relative":[130],"frequency":[131],"coefficient":[132],"making":[134],"decisions":[136],"tends":[137],"maximum.<":[140],">":[143]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2015667537","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2025-01-17T06:57:19.316551","created_date":"2016-06-24"}