{"id":"https://openalex.org/W4393160929","doi":"https://doi.org/10.1609/aaai.v38i19.30094","title":"P2BPO: Permeable Penalty Barrier-Based Policy Optimization for Safe RL","display_name":"P2BPO: Permeable Penalty Barrier-Based Policy Optimization for Safe RL","publication_year":2024,"publication_date":"2024-03-24","ids":{"openalex":"https://openalex.org/W4393160929","doi":"https://doi.org/10.1609/aaai.v38i19.30094"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i19.30094","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/30094/31928","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/30094/31928","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065814658","display_name":"Sumanta Dey","orcid":"https://orcid.org/0000-0002-6004-9100"},"institutions":[{"id":"https://openalex.org/I145894827","display_name":"Indian Institute of Technology Kharagpur","ror":"https://ror.org/03w5sq511","country_code":"IN","type":"education","lineage":["https://openalex.org/I145894827"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sumanta Dey","raw_affiliation_strings":["Indian Institute of Technology Kharagpur"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Kharagpur","institution_ids":["https://openalex.org/I145894827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033329960","display_name":"Pallab Dasgupta","orcid":"https://orcid.org/0000-0002-2178-8154"},"institutions":[{"id":"https://openalex.org/I1335490905","display_name":"Synopsys (Switzerland)","ror":"https://ror.org/03mb54f81","country_code":"CH","type":"company","lineage":["https://openalex.org/I1335490905","https://openalex.org/I4210088951"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Pallab Dasgupta","raw_affiliation_strings":["Synopsys"],"affiliations":[{"raw_affiliation_string":"Synopsys","institution_ids":["https://openalex.org/I1335490905"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085224640","display_name":"Soumyajit Dey","orcid":"https://orcid.org/0000-0001-9329-6389"},"institutions":[{"id":"https://openalex.org/I145894827","display_name":"Indian Institute of Technology Kharagpur","ror":"https://ror.org/03w5sq511","country_code":"IN","type":"education","lineage":["https://openalex.org/I145894827"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Soumyajit Dey","raw_affiliation_strings":["Indian Institute of Technology Kharagpur"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Kharagpur","institution_ids":["https://openalex.org/I145894827"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":83},"biblio":{"volume":"38","issue":"19","first_page":"21029","last_page":"21036"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9848,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9848,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13295","display_name":"Safety Systems Engineering in Autonomy","score":0.976,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9622,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4586861}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i19.30094","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/30094/31928","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i19.30094","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/30094/31928","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Peace, justice, and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.79}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":26,"referenced_works":["https://openalex.org/W1518931405","https://openalex.org/W1522301498","https://openalex.org/W1575592356","https://openalex.org/W1771410628","https://openalex.org/W1845972764","https://openalex.org/W2128429746","https://openalex.org/W2155027007","https://openalex.org/W2576525097","https://openalex.org/W2736601468","https://openalex.org/W2788014517","https://openalex.org/W2804791273","https://openalex.org/W2902907165","https://openalex.org/W2962734844","https://openalex.org/W2994712737","https://openalex.org/W2998619042","https://openalex.org/W3006225970","https://openalex.org/W3094224934","https://openalex.org/W3162902207","https://openalex.org/W4214717370","https://openalex.org/W4281480521","https://openalex.org/W4287725923","https://openalex.org/W4293545785","https://openalex.org/W4297824337","https://openalex.org/W4298857966","https://openalex.org/W4310486832","https://openalex.org/W4382239144"],"related_works":["https://openalex.org/W4391913857","https://openalex.org/W2748952813","https://openalex.org/W2530322880","https://openalex.org/W2478288626","https://openalex.org/W2390279801","https://openalex.org/W2382290278","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2350741829","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Safe":[0],"Reinforcement":[1],"Learning":[2],"(SRL)":[3],"algorithms":[4,44,183],"aim":[5],"to":[6,31,107,197],"learn":[7,160],"a":[8,93,103,121,128,145,161,172],"policy":[9,58,162],"that":[10,26,87,157,190],"maximizes":[11],"the":[12,16,21,33,72,79,99,135,138,149,164,198],"reward":[13,37],"while":[14,170,195],"satisfying":[15,163],"safety":[17,40,139,165],"constraints.":[18,140,199],"One":[19],"of":[20,36,96,120,148],"challenges":[22],"in":[23],"SRL":[24,182,186],"is":[25,28,105,117,125],"it":[27,191],"often":[29],"difficult":[30],"balance":[32],"two":[34],"objectives":[35],"maximization":[38],"and":[39,53,67,102,131,188],"constraint":[41,46],"satisfaction.":[42],"Existing":[43],"utilize":[45],"optimization":[47],"techniques":[48],"like":[49],"penalty-based,":[50,52],"barrier":[51],"Lagrangian-based":[54],"dual":[55],"or":[56],"primal":[57],"optimizations":[59],"methods.":[60],"However,":[61],"they":[62],"suffer":[63],"from":[64],"training":[65],"oscillations":[66],"approximation":[68],"errors,":[69],"which":[70,124,155],"impact":[71],"overall":[73],"learning":[74],"objectives.":[75],"This":[76],"paper":[77],"proposes":[78],"Permeable":[80],"Penalty":[81],"Barrier-based":[82],"Policy":[83],"Optimization":[84],"(P2BPO)":[85],"algorithm":[86],"addresses":[88],"this":[89,109],"issue":[90],"by":[91],"allowing":[92],"small":[94],"fraction":[95],"penalty":[97,100,115],"beyond":[98],"barrier,":[101],"parameter":[104,116],"used":[106,118],"control":[108],"permeability.":[110],"In":[111],"addition,":[112],"an":[113],"adaptive":[114],"instead":[119],"constant":[122],"one,":[123],"initialized":[126],"with":[127,167,180],"low":[129],"value":[130],"increased":[132],"gradually":[133],"as":[134],"agent":[136],"violates":[137],"We":[141],"have":[142],"also":[143],"provided":[144],"theoretical":[146],"proof":[147],"proposed":[150],"method's":[151],"performance":[152],"guarantee":[153],"bound,":[154],"ensures":[156],"P2BPO":[158,179],"can":[159],"constraints":[166],"high":[168],"probability":[169],"achieving":[171],"higher":[173],"expected":[174],"reward.":[175],"Furthermore,":[176],"we":[177],"compare":[178],"other":[181],"on":[184],"various":[185],"tasks":[187],"demonstrate":[189],"achieves":[192],"better":[193],"rewards":[194],"adhering":[196]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4393160929","counts_by_year":[],"updated_date":"2025-01-07T02:31:56.398974","created_date":"2024-03-26"}