{"id":"https://openalex.org/W4402952841","doi":"https://doi.org/10.48550/arxiv.2408.16286","title":"Near-Optimal Policy Identification in Robust Constrained Markov Decision\n Processes via Epigraph Form","display_name":"Near-Optimal Policy Identification in Robust Constrained Markov Decision\n Processes via Epigraph Form","publication_year":2024,"publication_date":"2024-08-29","ids":{"openalex":"https://openalex.org/W4402952841","doi":"https://doi.org/10.48550/arxiv.2408.16286"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.16286","pdf_url":"http://arxiv.org/pdf/2408.16286","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2408.16286","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031877106","display_name":"Toshinori Kitamura","orcid":"https://orcid.org/0000-0002-2326-3140"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kitamura, Toshinori","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070075141","display_name":"Tadashi Kozuno","orcid":"https://orcid.org/0000-0002-8820-1362"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kozuno, Tadashi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003820265","display_name":"Wataru Kumagai","orcid":"https://orcid.org/0000-0002-3081-5951"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumagai, Wataru","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075966877","display_name":"K. Hoshino","orcid":"https://orcid.org/0000-0003-0562-2733"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hoshino, Kenta","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009320679","display_name":"Yohei Hosoe","orcid":"https://orcid.org/0000-0002-5659-1060"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hosoe, Yohei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083407612","display_name":"Kazumi Kasaura","orcid":"https://orcid.org/0000-0002-3219-9961"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kasaura, Kazumi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034718334","display_name":"Masashi Hamaya","orcid":"https://orcid.org/0000-0003-4189-8219"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hamaya, Masashi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047914843","display_name":"Paavo Parmas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Parmas, Paavo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5074059447","display_name":"Yutaka Matsuo","orcid":"https://orcid.org/0000-0002-2070-4393"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matsuo, Yutaka","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.1914,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.1914,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10136","display_name":"Statistical Methods and Inference","score":0.1724,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12056","display_name":"Markov Chains and Monte Carlo Methods","score":0.1674,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/epigraph","display_name":"Epigraph","score":0.97051466},{"id":"https://openalex.org/keywords/identification","display_name":"Identification","score":0.596421}],"concepts":[{"id":"https://openalex.org/C17192189","wikidata":"https://www.wikidata.org/wiki/Q1347059","display_name":"Epigraph","level":2,"score":0.97051466},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.63460875},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.596421},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5064839},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.49679044},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.47096092},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.3986682},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.38644028},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.35187593},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34503868},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.32890046},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.21893558},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1575216},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.16286","pdf_url":"http://arxiv.org/pdf/2408.16286","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.16286","pdf_url":"http://arxiv.org/pdf/2408.16286","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4255368532","https://openalex.org/W2413828414","https://openalex.org/W2392366684","https://openalex.org/W2379651310","https://openalex.org/W2367222340","https://openalex.org/W2355558294","https://openalex.org/W2162286586","https://openalex.org/W2113019827","https://openalex.org/W187740018","https://openalex.org/W1541249122"],"abstract_inverted_index":{"Designing":[0],"a":[1,35,39,59,84,118,135,140],"safe":[2],"policy":[3,37,47,73,141,151,157],"for":[4],"uncertain":[5],"environments":[6],"is":[7],"crucial":[8],"in":[9,38,54,79,152],"real-world":[10],"control":[11],"applications.":[12],"However,":[13],"this":[14],"challenge":[15],"remains":[16],"inadequately":[17],"addressed":[18],"within":[19],"the":[20,29,55,67,90,105,109,114,123,126,130],"Markov":[21],"decision":[22],"process":[23],"(MDP)":[24],"framework.":[25],"This":[26],"paper":[27],"presents":[28],"first":[30,64],"algorithm":[31,138],"capable":[32],"of":[33,61,86,108],"identifying":[34],"near-optimal":[36],"robust":[40],"constrained":[41],"MDP":[42],"(RCMDP),":[43],"where":[44],"an":[45,149,153],"optimal":[46],"minimizes":[48],"cumulative":[49],"cost":[50],"while":[51],"satisfying":[52],"constraints":[53],"worst-case":[56],"scenario":[57],"across":[58],"set":[60],"environments.":[62],"We":[63],"prove":[65,145],"that":[66,146],"conventional":[68],"Lagrangian":[69],"max-min":[70],"formulation":[71],"with":[72,139,155],"gradient":[74,120,142],"methods":[75],"can":[76],"become":[77],"trapped":[78],"suboptimal":[80],"solutions":[81],"by":[82,116],"encountering":[83],"sum":[85],"conflicting":[87],"gradients":[88],"from":[89,121],"objective":[91,124],"and":[92,144],"constraint":[93],"functions":[94],"during":[95],"its":[96],"inner":[97],"minimization":[98],"problem.":[99],"To":[100],"address":[101],"this,":[102],"we":[103,133],"leverage":[104],"epigraph":[106,131],"form":[107],"RCMDP":[110,154],"problem,":[111],"which":[112],"resolves":[113],"conflict":[115],"selecting":[117],"single":[119],"either":[122],"or":[125],"constraints.":[127],"Building":[128],"on":[129],"form,":[132],"propose":[134],"binary":[136],"search":[137],"subroutine":[143],"it":[147],"identifies":[148],"$\\varepsilon$-optimal":[150],"$\\tilde{\\mathcal{O}}(\\varepsilon^{-4})$":[156],"evaluations.":[158]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4402952841","counts_by_year":[],"updated_date":"2024-12-15T14:03:12.758868","created_date":"2024-09-29"}