{"id":"https://openalex.org/W4393161074","doi":"https://doi.org/10.1609/aaai.v38i19.30101","title":"Solving Non-rectangular Reward-Robust MDPs via Frequency Regularization","display_name":"Solving Non-rectangular Reward-Robust MDPs via Frequency Regularization","publication_year":2024,"publication_date":"2024-03-24","ids":{"openalex":"https://openalex.org/W4393161074","doi":"https://doi.org/10.1609/aaai.v38i19.30101"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i19.30101","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/30101/31942","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/30101/31942","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092145987","display_name":"Uri Gadot","orcid":null},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Uri Gadot","raw_affiliation_strings":["Technion - Israel Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Technion - Israel Institute of Technology","institution_ids":["https://openalex.org/I174306211"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009378309","display_name":"Esther Derman","orcid":null},"institutions":[{"id":"https://openalex.org/I4210155582","display_name":"Centre Universitaire de Mila","ror":"https://ror.org/05s3cw058","country_code":"DZ","type":"education","lineage":["https://openalex.org/I4210155582"]},{"id":"https://openalex.org/I70931966","display_name":"Universit\u00e9 de Montr\u00e9al","ror":"https://ror.org/0161xgx34","country_code":"CA","type":"education","lineage":["https://openalex.org/I70931966"]}],"countries":["CA","DZ"],"is_corresponding":false,"raw_author_name":"Esther Derman","raw_affiliation_strings":["MILA, Universit\u00e9 de Montr\u00e9al"],"affiliations":[{"raw_affiliation_string":"MILA, Universit\u00e9 de Montr\u00e9al","institution_ids":["https://openalex.org/I4210155582","https://openalex.org/I70931966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107933760","display_name":"Navdeep Kumar","orcid":null},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Navdeep Kumar","raw_affiliation_strings":["Technion - Israel Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Technion - Israel Institute of Technology","institution_ids":["https://openalex.org/I174306211"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092767720","display_name":"Maxence Mohamed Elfatihi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127572","display_name":"IMT Atlantique","ror":"https://ror.org/030hj3061","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210127572"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Maxence Mohamed Elfatihi","raw_affiliation_strings":["IMT Atlantique"],"affiliations":[{"raw_affiliation_string":"IMT Atlantique","institution_ids":["https://openalex.org/I4210127572"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022424856","display_name":"Kfir Y. Levy","orcid":"https://orcid.org/0000-0003-1236-2626"},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Kfir Levy","raw_affiliation_strings":["Technion - Israel Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Technion - Israel Institute of Technology","institution_ids":["https://openalex.org/I174306211"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036260775","display_name":"Shie Mannor","orcid":"https://orcid.org/0000-0003-4439-7647"},"institutions":[{"id":"https://openalex.org/I174306211","display_name":"Technion \u2013 Israel Institute of Technology","ror":"https://ror.org/03qryx823","country_code":"IL","type":"education","lineage":["https://openalex.org/I174306211"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Shie Mannor","raw_affiliation_strings":["Technion - Israel Institute of Technology\nNVIDIA Research"],"affiliations":[{"raw_affiliation_string":"Technion - Israel Institute of Technology\nNVIDIA Research","institution_ids":["https://openalex.org/I174306211"]}]}],"institution_assertions":[],"countries_distinct_count":4,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.278,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":1,"citation_normalized_percentile":{"value":0.993789,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":84,"max":92},"biblio":{"volume":"38","issue":"19","first_page":"21090","last_page":"21098"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.936,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.936,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11749","display_name":"Iterative Learning Control Systems","score":0.9346,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9336,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization","score":0.59549254}],"concepts":[{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.59549254},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4239687},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.42394823},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3644307},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.36117},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.22451073}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i19.30101","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/30101/31942","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2309.01107","pdf_url":"https://arxiv.org/pdf/2309.01107","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i19.30101","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/30101/31942","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.72,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, justice, and strong institutions"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":37,"referenced_works":["https://openalex.org/W1600437712","https://openalex.org/W1965878388","https://openalex.org/W2107086007","https://openalex.org/W2155027007","https://openalex.org/W2155153696","https://openalex.org/W2158782408","https://openalex.org/W2159951376","https://openalex.org/W2165428239","https://openalex.org/W2165622730","https://openalex.org/W2168565265","https://openalex.org/W2522885171","https://openalex.org/W2736601468","https://openalex.org/W2803547033","https://openalex.org/W2914920107","https://openalex.org/W2981021427","https://openalex.org/W2998116579","https://openalex.org/W3005360685","https://openalex.org/W3035388736","https://openalex.org/W3036165382","https://openalex.org/W3124876810","https://openalex.org/W3206893245","https://openalex.org/W4214717370","https://openalex.org/W4221157223","https://openalex.org/W4224230876","https://openalex.org/W4225948724","https://openalex.org/W4226165083","https://openalex.org/W4244777963","https://openalex.org/W4280543789","https://openalex.org/W4281764608","https://openalex.org/W4287273849","https://openalex.org/W4296933623","https://openalex.org/W4298023569","https://openalex.org/W4299662360","https://openalex.org/W4312091648","https://openalex.org/W4318907328","https://openalex.org/W4386501408","https://openalex.org/W4394666657"],"related_works":["https://openalex.org/W4238204885","https://openalex.org/W3002753104","https://openalex.org/W2748952813","https://openalex.org/W2600246793","https://openalex.org/W2142036596","https://openalex.org/W2077600819","https://openalex.org/W2072657027","https://openalex.org/W2061531152","https://openalex.org/W2007980826","https://openalex.org/W1979597421"],"abstract_inverted_index":{"In":[0,81],"robust":[1],"Markov":[2],"decision":[3],"processes":[4],"(RMDPs),":[5],"it":[6,69],"is":[7,49,60,93],"assumed":[8],"that":[9,32],"the":[10,13,27,46,90,96,136],"reward":[11,87,97],"and":[12,74,118,129,140],"transition":[14,91],"dynamics":[15],"lie":[16],"in":[17],"a":[18,67,71,104,109,126],"given":[19],"uncertainty":[20,47],"set.":[21],"By":[22],"targeting":[23],"maximal":[24],"return":[25],"under":[26],"most":[28],"adversarial":[29],"model":[30],"from":[31,103],"set,":[33],"RMDPs":[34,88],"address":[35],"performance":[36],"sensitivity":[37],"to":[38,42,77,147],"misspecified":[39],"environments.":[40],"Yet,":[41],"preserve":[43],"computational":[44,64],"tractability,":[45],"set":[48],"traditionally":[50],"independently":[51],"structured":[52],"for":[53],"each":[54],"state.":[55],"This":[56],"so-called":[57],"rectangularity":[58],"condition":[59],"solely":[61],"motivated":[62],"by":[63],"concerns.":[65],"As":[66],"result,":[68],"lacks":[70],"practical":[72],"incentive":[73],"may":[75],"lead":[76],"overly":[78],"conservative":[79,143],"behavior.":[80],"this":[82,113],"work,":[83],"we":[84],"study":[85],"coupled":[86],"where":[89],"kernel":[92],"fixed,":[94],"but":[95],"function":[98],"lies":[99],"within":[100],"an":[101],"alpha-radius":[102],"nominal":[105],"one.":[106],"We":[107,124],"draw":[108],"direct":[110],"connection":[111],"between":[112],"type":[114],"of":[115],"non-rectangular":[116],"reward-RMDPs":[117],"applying":[119],"policy":[120],"visitation":[121],"frequency":[122],"regularization.":[123],"introduce":[125],"policy-gradient":[127],"method,":[128],"prove":[130],"its":[131,141],"convergence.":[132],"Numerical":[133],"experiments":[134],"illustrate":[135],"learned":[137],"policy's":[138],"robustness":[139],"less":[142],"behavior":[144],"when":[145],"compared":[146],"rectangular":[148],"uncertainty.":[149]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4393161074","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-01-04T12:39:01.240866","created_date":"2024-03-26"}