{"id":"https://openalex.org/W2998050631","doi":"https://doi.org/10.1609/aaai.v34i04.6021","title":"Adaptive Trust Region Policy Optimization: Global Convergence and Faster Rates for Regularized MDPs","display_name":"Adaptive Trust Region Policy Optimization: Global Convergence and Faster Rates for Regularized MDPs","publication_year":2020,"publication_date":"2020-04-03","ids":{"openalex":"https://openalex.org/W2998050631","doi":"https://doi.org/10.1609/aaai.v34i04.6021","mag":"2998050631"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v34i04.6021","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/6021/5877","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/6021/5877","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049062714","display_name":"Lior Shani","orcid":"https://orcid.org/0000-0003-1504-0534"},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Lior Shani","raw_affiliation_strings":["Technion"],"affiliations":[{"raw_affiliation_string":"Technion","institution_ids":["https://openalex.org/I174306211"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090891199","display_name":"Yonathan Efroni","orcid":null},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Yonathan Efroni","raw_affiliation_strings":["Technion"],"affiliations":[{"raw_affiliation_string":"Technion","institution_ids":["https://openalex.org/I174306211"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036260775","display_name":"Shie Mannor","orcid":"https://orcid.org/0000-0003-4439-7647"},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Shie Mannor","raw_affiliation_strings":["Technion"],"affiliations":[{"raw_affiliation_string":"Technion","institution_ids":["https://openalex.org/I174306211"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.465,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":82,"citation_normalized_percentile":{"value":0.999907,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"34","issue":"04","first_page":"5668","last_page":"5675"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.98,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.98,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/trust-region","display_name":"Trust region","score":0.6887684}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.79257274},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.74771434},{"id":"https://openalex.org/C89109886","wikidata":"https://www.wikidata.org/wiki/Q1535924","display_name":"Trust region","level":3,"score":0.6887684},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.65567434},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6301227},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.59436524},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5111404},{"id":"https://openalex.org/C112680207","wikidata":"https://www.wikidata.org/wiki/Q714886","display_name":"Regular polygon","level":2,"score":0.5073318},{"id":"https://openalex.org/C57869625","wikidata":"https://www.wikidata.org/wiki/Q1783502","display_name":"Rate of convergence","level":3,"score":0.5010178},{"id":"https://openalex.org/C157972887","wikidata":"https://www.wikidata.org/wiki/Q463359","display_name":"Convex optimization","level":3,"score":0.42342985},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20990333},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14082399},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.080437094},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C178635117","wikidata":"https://www.wikidata.org/wiki/Q747499","display_name":"RADIUS","level":2,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v34i04.6021","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/6021/5877","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.02769","pdf_url":"http://arxiv.org/pdf/1909.02769","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v34i04.6021","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/6021/5877","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":26,"referenced_works":["https://openalex.org/W107583932","https://openalex.org/W1575592356","https://openalex.org/W1771410628","https://openalex.org/W1998880679","https://openalex.org/W2016384870","https://openalex.org/W2094387729","https://openalex.org/W2112964839","https://openalex.org/W2115738253","https://openalex.org/W2119579400","https://openalex.org/W2121863487","https://openalex.org/W2155027007","https://openalex.org/W2736601468","https://openalex.org/W2763081248","https://openalex.org/W2914920107","https://openalex.org/W2949996623","https://openalex.org/W2962821147","https://openalex.org/W2963267001","https://openalex.org/W2963849886","https://openalex.org/W2964043796","https://openalex.org/W3046626913","https://openalex.org/W3096194929","https://openalex.org/W3117137507","https://openalex.org/W4288358463","https://openalex.org/W4297814233","https://openalex.org/W4298857904","https://openalex.org/W4320473289"],"related_works":["https://openalex.org/W4385488867","https://openalex.org/W3148822801","https://openalex.org/W2752681920","https://openalex.org/W2393042414","https://openalex.org/W2353911672","https://openalex.org/W2080108722","https://openalex.org/W2062145486","https://openalex.org/W2058731384","https://openalex.org/W2010584785","https://openalex.org/W1997473290"],"abstract_inverted_index":{"Trust":[0],"region":[1],"policy":[2,11],"optimization":[3],"(TRPO)":[4],"is":[5,33,60,137],"a":[6,20,41],"popular":[7],"and":[8,90,100],"empirically":[9],"successful":[10],"search":[12],"algorithm":[13,43],"in":[14,18,58,61,78,82,119,133,141],"Reinforcement":[15],"Learning":[16],"(RL)":[17],"which":[19,83,123],"surrogate":[21],"problem,":[22],"that":[23,52],"restricts":[24],"consecutive":[25],"policies":[26],"to":[27,30,87,105,116],"be":[28],"\u2018close\u2019":[29],"one":[31],"another,":[32],"iteratively":[34],"solved.":[35],"Nevertheless,":[36],"TRPO":[37,59,77,99,118],"has":[38],"been":[39],"considered":[40],"heuristic":[42],"inspired":[44],"by":[45],"Conservative":[46],"Policy":[47],"Iteration":[48],"(CPI).":[49],"We":[50,74],"show":[51],"the":[53,63,79,88,91,106,110,138,148],"adaptive":[54,111],"scaling":[55,112],"mechanism":[56,113],"used":[57],"fact":[62],"natural":[64],"\u201cRL":[65],"version\u201d":[66],"of":[67,128,143],"traditional":[68],"trust-region":[69],"methods":[70],"from":[71],"convex":[72,134],"analysis.":[73],"first":[75,139],"analyze":[76,117],"planning":[80],"setting,":[81],"we":[84,96,124],"have":[85],"access":[86],"model":[89],"entire":[92],"state":[93],"space.":[94],"Then,":[95],"consider":[97],"sample-based":[98],"establish":[101],"\u00d5(1/\u221aN)":[102],"convergence":[103],"rate":[104],"global":[107],"optimum.":[108],"Importantly,":[109],"allows":[114],"us":[115],"regularized":[120],"MDPs":[121],"for":[122],"prove":[125],"fast":[126],"rates":[127,145],"\u00d5(1/N),":[129],"much":[130],"like":[131],"results":[132],"optimization.":[135],"This":[136],"result":[140],"RL":[142],"better":[144],"when":[146],"regularizing":[147],"instantaneous":[149],"cost":[150],"or":[151],"reward.":[152]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2998050631","counts_by_year":[{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":16},{"year":2021,"cited_by_count":26},{"year":2020,"cited_by_count":16},{"year":2019,"cited_by_count":2}],"updated_date":"2025-01-07T15:25:05.014054","created_date":"2020-01-10"}