{"id":"https://openalex.org/W4404317933","doi":"https://doi.org/10.48550/arxiv.2410.21081","title":"Stronger Regret Bounds for Safe Online Reinforcement Learning in the\n Linear Quadratic Regulator","display_name":"Stronger Regret Bounds for Safe Online Reinforcement Learning in the\n Linear Quadratic Regulator","publication_year":2024,"publication_date":"2024-10-28","ids":{"openalex":"https://openalex.org/W4404317933","doi":"https://doi.org/10.48550/arxiv.2410.21081"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.21081","pdf_url":"http://arxiv.org/pdf/2410.21081","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2410.21081","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013165219","display_name":"Benjamin F. Schiffer","orcid":"https://orcid.org/0000-0001-8951-2157"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schiffer, Benjamin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5060531981","display_name":"Lucas Janson","orcid":"https://orcid.org/0000-0001-8641-8181"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Janson, Lucas","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.963,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.963,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9627,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10917","display_name":"Smart Grid Security and Resilience","score":0.9536,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.9227896},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7100936},{"id":"https://openalex.org/C6929976","wikidata":"https://www.wikidata.org/wiki/Q3771881","display_name":"Regulator","level":3,"score":0.61825454},{"id":"https://openalex.org/C98779006","wikidata":"https://www.wikidata.org/wiki/Q2520550","display_name":"Linear-quadratic regulator","level":3,"score":0.59087163},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5066946},{"id":"https://openalex.org/C129844170","wikidata":"https://www.wikidata.org/wiki/Q41299","display_name":"Quadratic equation","level":2,"score":0.46059808},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.44368258},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.41257244},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.40583318},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.37224644},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32129028},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.22919503},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.22633645},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.1566213},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.07714322},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.05363208},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.21081","pdf_url":"http://arxiv.org/pdf/2410.21081","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.21081","pdf_url":"http://arxiv.org/pdf/2410.21081","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4292701710","https://openalex.org/W311622956","https://openalex.org/W3091102817","https://openalex.org/W2731754693","https://openalex.org/W267916353","https://openalex.org/W2326995340","https://openalex.org/W2152693275","https://openalex.org/W1985169903","https://openalex.org/W1925875298","https://openalex.org/W1592622096"],"abstract_inverted_index":{"Many":[0],"practical":[1],"applications":[2],"of":[3,10,70,130,167,183],"online":[4],"reinforcement":[5],"learning":[6,14,28],"require":[7],"the":[8,16,34,38,47,102,114,148,180,191,197],"satisfaction":[9],"safety":[11,173,186],"constraints":[12],"while":[13],"about":[15],"unknown":[17,30],"environment.":[18],"In":[19],"this":[20],"work,":[21],"we":[22,57,87,96,100,123],"study":[23,67],"Linear":[24],"Quadratic":[25],"Regulator":[26],"(LQR)":[27],"with":[29,33,50],"dynamics,":[31],"but":[32],"additional":[35],"constraint":[36],"that":[37,73,171,177],"position":[39],"must":[40],"stay":[41],"within":[42],"a":[43,127],"safe":[44],"region":[45],"for":[46,59,77,118,137,159,179],"entire":[48],"trajectory":[49],"high":[51],"probability.":[52],"Unlike":[53],"in":[54,185,190,196],"previous":[55],"works,":[56],"allow":[58],"both":[60],"bounded":[61],"and":[62,66,92,155],"unbounded":[63],"noise":[64,149,162],"distributions":[65],"stronger":[68],"baselines":[69],"nonlinear":[71],"controllers":[72],"are":[74],"better":[75],"suited":[76],"constrained":[78,119,187],"problems":[79],"than":[80],"linear":[81],"controllers.":[82,132],"Due":[83],"to":[84,107,126],"these":[85],"complications,":[86],"focus":[88],"on":[89],"1-dimensional":[90],"state-":[91],"action-":[93],"spaces,":[94],"however":[95],"also":[97],"discuss":[98],"how":[99],"expect":[101],"high-level":[103],"takeaways":[104],"can":[105],"generalize":[106],"higher":[108],"dimensions.":[109],"Our":[110],"primary":[111],"contribution":[112],"is":[113,145,157,170],"first":[115],"$\\tilde{O}_T(\\sqrt{T})$-regret":[116,144],"bound":[117],"LQR":[120],"learning,":[121],"which":[122],"show":[124],"relative":[125],"specific":[128],"baseline":[129,140],"non-linear":[131,139],"We":[133],"then":[134],"prove":[135],"that,":[136],"any":[138,160],"satisfying":[141],"natural":[142],"assumptions,":[143],"possible":[146,158],"when":[147],"distribution":[150],"has":[151],"sufficiently":[152],"large":[153],"support":[154],"$\\tilde{O}_T(T^{2/3})$-regret":[156],"subgaussian":[161],"distribution.":[163],"An":[164],"overarching":[165],"theme":[166],"our":[168],"results":[169],"enforcing":[172],"provides":[174],"\"free":[175],"exploration\"":[176],"compensates":[178],"added":[181],"cost":[182],"uncertainty":[184],"control,":[188],"resulting":[189],"same":[192],"regret":[193],"rate":[194],"as":[195],"unconstrained":[198],"problem.":[199]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4404317933","counts_by_year":[],"updated_date":"2025-04-23T05:05:58.547220","created_date":"2024-11-14"}