{"id":"https://openalex.org/W4320560508","doi":"https://doi.org/10.48550/arxiv.2302.05372","title":"Towards Minimax Optimality of Model-based Robust Reinforcement Learning","display_name":"Towards Minimax Optimality of Model-based Robust Reinforcement Learning","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4320560508","doi":"https://doi.org/10.48550/arxiv.2302.05372"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2302.05372","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2302.05372","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113950734","display_name":"Pierre J. Clavier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Clavier, Pierre","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071977875","display_name":"Erwan Le Pennec","orcid":"https://orcid.org/0000-0002-7988-7999"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pennec, Erwan Le","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5110482875","display_name":"Matthieu Geist","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Geist, Matthieu","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9869,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9869,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9636,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9547,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sample-complexity","display_name":"Sample complexity","score":0.5301472},{"id":"https://openalex.org/keywords/ball","display_name":"Ball (mathematics)","score":0.44483066},{"id":"https://openalex.org/keywords/tilde","display_name":"Tilde","score":0.42318773}],"concepts":[{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.7468196},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6431194},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.6249751},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.5301472},{"id":"https://openalex.org/C122041747","wikidata":"https://www.wikidata.org/wiki/Q838611","display_name":"Ball (mathematics)","level":2,"score":0.44483066},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.42568803},{"id":"https://openalex.org/C36686422","wikidata":"https://www.wikidata.org/wiki/Q11167","display_name":"Tilde","level":2,"score":0.42318773},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.41752195},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.36491635},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.34085602},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.22887594},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.17770115},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.13524958},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.12081644},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.08995125}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2302.05372","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2302.05372","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2302.05372","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.66,"display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3099159407","https://openalex.org/W3035780950","https://openalex.org/W3035454135","https://openalex.org/W2995519751","https://openalex.org/W2990709181","https://openalex.org/W2985982678","https://openalex.org/W2911931139","https://openalex.org/W2803304514","https://openalex.org/W2097562045","https://openalex.org/W1970303738"],"abstract_inverted_index":{"We":[0],"study":[1,138],"the":[2,25,34,69,82,106,112,116,119,134,139,151,160,164,181,199,202,209,221,224,228,236,239],"sample":[3,85,140,170,210],"complexity":[4,86,141,171,211],"of":[5,24,142,172,187,201,238],"obtaining":[6],"an":[7,47,60,131,155],"$\\epsilon$-optimal":[8,61],"policy":[9],"in":[10,33,68],"\\emph{Robust}":[11],"discounted":[12],"Markov":[13],"Decision":[14],"Processes":[15],"(RMDPs),":[16],"given":[17],"only":[18],"access":[19],"to":[20,46,154,212],"a":[21,169,232],"generative":[22,161],"model":[23],"nominal":[26],"kernel.":[27],"This":[28],"problem":[29],"is":[30,39,64,87,109,204,241],"widely":[31],"studied":[32],"non-robust":[35,225],"case,":[36,166],"and":[37,104,137,183,191,231],"it":[38],"known":[40,84],"that":[41],"any":[42],"planning":[43,144],"approach":[44],"applied":[45,153],"empirical":[48,156],"MDP":[49],"estimated":[50,158],"with":[51,130],"$\\tilde{\\mathcal{O}}(\\frac{H^3":[52,213],"\\mid":[53,89,96,174,214,218],"S":[54,90,97,175,189,193,215],"\\mid\\mid":[55,176,194,216],"A":[56,92,99,177,195,217],"\\mid}{\\epsilon^2})$":[57,93,178],"samples":[58],"provides":[59],"policy,":[62],"which":[63],"minimax":[65],"optimal.":[66],"Results":[67],"robust":[70,233],"case":[71,226],"are":[72],"much":[73],"more":[74],"scarce.":[75],"For":[76],"$sa$-":[77,182],"(resp":[78],"$s$-)rectangular":[79],"uncertainty":[80,107,127,203,240],"sets,":[81],"best":[83],"$\\tilde{\\mathcal{O}}(\\frac{H^4":[88,95,173],"\\mid^2\\mid":[91,98],"(resp.":[94],"\\mid^2}{\\epsilon^2})$),":[100],"for":[101,179,223,227],"specific":[102],"algorithms":[103],"when":[105,235],"set":[108],"based":[110],"on":[111,150],"total":[113],"variation":[114],"(TV),":[115],"KL":[117],"or":[118],"Chi-square":[120],"divergences.":[121],"In":[122,163],"this":[123],"paper,":[124],"we":[125,167,207],"consider":[126],"sets":[128],"defined":[129],"$L_p$-ball":[132],"(recovering":[133],"TV":[135],"case),":[136],"\\emph{any}":[143],"algorithm":[145],"(with":[146],"high":[147],"accuracy":[148],"guarantee":[149],"solution)":[152],"RMDP":[157],"using":[159],"model.":[162],"general":[165],"prove":[168],"both":[180],"$s$-rectangular":[184],"cases":[185],"(improvements":[186],"$\\mid":[188,192],"\\mid$":[190,196],"respectively).":[197],"When":[198],"size":[200,237],"small":[205,242],"enough,":[206],"improve":[208],"}{\\epsilon^2})$,":[219],"recovering":[220],"lower-bound":[222,234],"first":[229],"time":[230],"enough.":[243]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4320560508","counts_by_year":[],"updated_date":"2025-01-04T17:38:02.775055","created_date":"2023-02-15"}