{"id":"https://openalex.org/W4403746549","doi":"https://doi.org/10.48550/arxiv.2409.11986","title":"Data-Efficient Quadratic Q-Learning Using LMIs","display_name":"Data-Efficient Quadratic Q-Learning Using LMIs","publication_year":2024,"publication_date":"2024-09-18","ids":{"openalex":"https://openalex.org/W4403746549","doi":"https://doi.org/10.48550/arxiv.2409.11986"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.11986","pdf_url":"http://arxiv.org/pdf/2409.11986","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2409.11986","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093745181","display_name":"J. S. van Hulst","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"van Hulst, J. S.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063241688","display_name":"W.P.M.H. Heemels","orcid":"https://orcid.org/0000-0003-3440-8007"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heemels, W. P. M. H.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5013274345","display_name":"Duarte Antunes","orcid":"https://orcid.org/0000-0003-3047-9334"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Antunes, D. J.","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.8909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.8909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.8658,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.8523,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C129844170","wikidata":"https://www.wikidata.org/wiki/Q41299","display_name":"Quadratic equation","level":2,"score":0.75342417},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.43402272},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3594375},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.32196718},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.32084352},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.09419489}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.11986","pdf_url":"http://arxiv.org/pdf/2409.11986","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.11986","pdf_url":"http://arxiv.org/pdf/2409.11986","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W3002753104","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2142036596","https://openalex.org/W2077600819","https://openalex.org/W2072657027","https://openalex.org/W2061531152","https://openalex.org/W2007980826","https://openalex.org/W1979597421"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,131],"(RL)":[2],"has":[3],"seen":[4],"significant":[5],"research":[6],"and":[7,43,52,78],"application":[8],"results":[9],"but":[10],"often":[11],"requires":[12],"large":[13],"amounts":[14],"of":[15,64,116],"training":[16],"data.":[17],"This":[18],"paper":[19],"proposes":[20],"two":[21,71],"data-efficient":[22],"off-policy":[23],"RL":[24],"methods":[25,121],"that":[26,99],"use":[27],"parametrized":[28,142],"Q-learning.":[29],"In":[30],"these":[31],"methods,":[32],"the":[33,41,50,62,85],"Q-function":[34,127],"is":[35,67],"chosen":[36],"to":[37],"be":[38],"linear":[39,107],"in":[40,45,49],"parameters":[42],"quadratic":[44],"selected":[46],"basis":[47],"functions":[48],"state":[51],"control":[53],"deviations":[54],"from":[55],"a":[56,96,101],"base":[57],"policy.":[58],"A":[59,133],"cost":[60],"penalizing":[61],"$\\ell_1$-norm":[63],"Bellman":[65],"errors":[66],"minimized.":[68],"We":[69],"propose":[70],"methods:":[72],"Linear":[73],"Matrix":[74],"Inequality":[75],"Q-Learning":[76],"(LMI-QL)":[77],"its":[79],"iterative":[80],"variant":[81],"(LMI-QLi),":[82],"which":[83],"solve":[84],"resulting":[86],"episodic":[87],"optimization":[88,124],"problem":[89,105],"through":[90],"convex":[91,97,123],"optimization.":[92],"LMI-QL":[93],"relies":[94],"on":[95],"relaxation":[98],"yields":[100],"semidefinite":[102],"programming":[103],"(SDP)":[104],"with":[106,125],"matrix":[108],"inequalities":[109],"(LMIs).":[110],"LMI-QLi":[111],"entails":[112],"solving":[113],"sequential":[114],"iterations":[115],"an":[117],"SDP":[118],"problem.":[119],"Both":[120],"combine":[122],"direct":[126],"learning,":[128],"significantly":[129],"improving":[130],"speed.":[132],"numerical":[134],"case":[135],"study":[136],"demonstrates":[137],"their":[138],"advantages":[139],"over":[140],"existing":[141],"Q-learning":[143],"methods.":[144]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403746549","counts_by_year":[],"updated_date":"2024-12-15T17:26:37.514566","created_date":"2024-10-25"}