{"id":"https://openalex.org/W4379251623","doi":"https://doi.org/10.48550/arxiv.2306.00035","title":"ROSARL: Reward-Only Safe Reinforcement Learning","display_name":"ROSARL: Reward-Only Safe Reinforcement Learning","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4379251623","doi":"https://doi.org/10.48550/arxiv.2306.00035"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2306.00035","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2306.00035","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017776766","display_name":"Geraud Nangue Tasse","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tasse, Geraud Nangue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062478973","display_name":"Tamlin Love","orcid":"https://orcid.org/0000-0001-6441-3777"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Love, Tamlin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092081206","display_name":"Mark Nemecek","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nemecek, Mark","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078861770","display_name":"Steven James","orcid":"https://orcid.org/0000-0003-4366-4125"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"James, Steven","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5068297734","display_name":"Benjamin Rosman","orcid":"https://orcid.org/0000-0002-0284-4114"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rosman, Benjamin","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.710701,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":65,"max":76},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9383,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9383,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/penalty-method","display_name":"Penalty Method","score":0.6247645}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8395188},{"id":"https://openalex.org/C149728462","wikidata":"https://www.wikidata.org/wiki/Q751319","display_name":"Minimax","level":2,"score":0.75476515},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.71666616},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.700483},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6415744},{"id":"https://openalex.org/C6180225","wikidata":"https://www.wikidata.org/wiki/Q3411771","display_name":"Penalty method","level":2,"score":0.6247645},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5810826},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.57883185},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.53558856},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.49460718},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.44595224},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.43429002},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.4166156},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24729943},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17952606},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14467424},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.10662034},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2306.00035","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.00035","pdf_url":"http://arxiv.org/pdf/2306.00035","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2306.00035","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2306.00035","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.7,"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4239246781","https://openalex.org/W4205698120","https://openalex.org/W2561315646","https://openalex.org/W2332386680","https://openalex.org/W2248621902","https://openalex.org/W2164760767","https://openalex.org/W2053378152","https://openalex.org/W2036697162","https://openalex.org/W2003779889","https://openalex.org/W1979367874"],"abstract_inverted_index":{"An":[0],"important":[1],"problem":[2],"in":[3,15,31,75,104,191],"reinforcement":[4],"learning":[5,175],"is":[6,21,47],"designing":[7,76],"agents":[8,57,186],"that":[9,58,142,181,187],"learn":[10,170,188],"to":[11,26,38,56,70,131,169,185],"solve":[12],"tasks":[13],"safely":[14],"an":[16,157,167],"environment.":[17,158],"A":[18],"common":[19],"solution":[20],"for":[22,90,166],"a":[23,29,36,52,65,91,95,161],"human":[24],"expert":[25],"define":[27],"either":[28],"penalty":[30,53,66,173],"the":[32,68,73,84,87,106,118,137,152,176],"reward":[33,77],"function":[34],"or":[35,78],"cost":[37,79],"be":[39,145],"minimised":[40],"when":[41],"reaching":[42,121],"unsafe":[43,60,99,112,123],"states.":[44],"However,":[45],"this":[46,132,171],"non-trivial,":[48],"since":[49],"too":[50,63],"small":[51],"may":[54],"lead":[55],"reach":[59],"states,":[61,100,124],"while":[62,174],"large":[64],"increases":[67],"time":[69],"convergence.":[71],"Additionally,":[72],"difficulty":[74],"functions":[80],"can":[81,144],"increase":[82],"with":[83,94],"complexity":[85],"of":[86,98,109,120,126,156],"problem.":[88],"Hence,":[89],"given":[92,96],"environment":[93],"set":[97],"we":[101],"are":[102],"interested":[103],"finding":[105],"upper":[107,134],"bound":[108,135],"rewards":[110],"at":[111],"states":[113],"whose":[114],"optimal":[115],"policies":[116,190],"minimise":[117],"probability":[119],"those":[122],"irrespective":[125],"task":[127,177],"rewards.":[128],"We":[129,159],"refer":[130],"exact":[133],"as":[136],"\"Minmax":[138],"penalty\",":[139],"and":[140,154,179],"show":[141],"it":[143,183],"obtained":[146],"by":[147],"taking":[148],"into":[149],"account":[150],"both":[151],"controllability":[153],"diameter":[155],"provide":[160],"simple":[162],"practical":[163],"model-free":[164],"algorithm":[165],"agent":[168],"Minmax":[172],"policy,":[178],"demonstrate":[180],"using":[182],"leads":[184],"safe":[189],"high-dimensional":[192],"continuous":[193],"control":[194],"environments.":[195]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4379251623","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-04-24T02:53:26.378429","created_date":"2023-06-04"}