{"id":"https://openalex.org/W4404405868","doi":"https://doi.org/10.48550/arxiv.2411.07591","title":"Overcoming the Curse of Dimensionality in Reinforcement Learning Through\n Approximate Factorization","display_name":"Overcoming the Curse of Dimensionality in Reinforcement Learning Through\n Approximate Factorization","publication_year":2024,"publication_date":"2024-11-12","ids":{"openalex":"https://openalex.org/W4404405868","doi":"https://doi.org/10.48550/arxiv.2411.07591"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.07591","pdf_url":"http://arxiv.org/pdf/2411.07591","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2411.07591","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054906837","display_name":"Chenbei Lu","orcid":"https://orcid.org/0000-0002-7715-4927"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Chenbei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075795654","display_name":"Laixi Shi","orcid":"https://orcid.org/0000-0003-4038-8620"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Laixi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058269077","display_name":"Zaiwei Chen","orcid":"https://orcid.org/0000-0001-9915-5595"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zaiwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014416228","display_name":"Chenye Wu","orcid":"https://orcid.org/0000-0002-5730-916X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Chenye","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5062565732","display_name":"Adam Wierman","orcid":"https://orcid.org/0000-0002-5923-0199"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wierman, Adam","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.5499,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.5499,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.476,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.905175},{"id":"https://openalex.org/C2780273121","wikidata":"https://www.wikidata.org/wiki/Q109411","display_name":"Curse","level":2,"score":0.79140747},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7219486},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.616275},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.51873213},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.449618},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41646728},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.13437161},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.10235691},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.07591","pdf_url":"http://arxiv.org/pdf/2411.07591","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.07591","pdf_url":"http://arxiv.org/pdf/2411.07591","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4386146136","https://openalex.org/W4251206704","https://openalex.org/W3206079743","https://openalex.org/W3088621770","https://openalex.org/W2561617217","https://openalex.org/W2528559906","https://openalex.org/W2039478586","https://openalex.org/W2005044196","https://openalex.org/W1561349031","https://openalex.org/W1485007142"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"algorithms":[3,108],"are":[4],"known":[5],"to":[6,15,23,32],"suffer":[7],"from":[8],"the":[9,16,50,70,82,89,103,116,140,144,146,152,155,165],"curse":[10,51,83],"of":[11,52,73,84,105,121,143,148,154,167],"dimensionality,":[12,53],"which":[13],"refers":[14],"fact":[17],"that":[18,56],"large-scale":[19],"problems":[20,59],"often":[21],"lead":[22],"exponentially":[24,160],"high":[25],"sample":[26,71,127,149],"complexity.":[27],"A":[28],"common":[29],"solution":[30],"is":[31],"use":[33],"deep":[34],"neural":[35],"networks":[36],"for":[37,130],"function":[38],"approximation;":[39],"however,":[40],"such":[41],"approaches":[42],"typically":[43],"lack":[44],"theoretical":[45],"guarantees.":[46],"To":[47],"provably":[48],"address":[49],"we":[54,79,163],"observe":[55],"many":[57],"real-world":[58],"exhibit":[60],"task-specific":[61],"model":[62,137],"structures":[63],"that,":[64],"when":[65],"properly":[66],"leveraged,":[67],"can":[68,158],"improve":[69],"efficiency":[72],"RL.":[74],"Building":[75],"on":[76,151,173],"this":[77],"insight,":[78],"propose":[80],"overcoming":[81],"dimensionality":[85],"by":[86,135],"approximately":[87],"factorizing":[88],"original":[90],"Markov":[91],"decision":[92],"processes":[93],"(MDPs)":[94],"into":[95],"smaller,":[96],"independently":[97],"evolving":[98],"MDPs.":[99],"This":[100],"factorization":[101,142],"enables":[102],"development":[104],"sample-efficient":[106],"RL":[107],"in":[109],"both":[110,131,174],"model-based":[111],"and":[112,178],"model-free":[113],"settings,":[114],"with":[115],"latter":[117],"involving":[118],"a":[119,179],"variant":[120],"variance-reduced":[122],"Q-learning.":[123],"We":[124],"provide":[125],"improved":[126],"complexity":[128,150],"guarantees":[129],"proposed":[132,169],"algorithms.":[133],"Notably,":[134],"leveraging":[136],"structure":[138],"through":[139,171],"approximate":[141],"MDP,":[145],"dependence":[147],"size":[153],"state-action":[156],"space":[157],"be":[159],"reduced.":[161],"Numerically,":[162],"demonstrate":[164],"practicality":[166],"our":[168],"methods":[170],"experiments":[172],"synthetic":[175],"MDP":[176],"tasks":[177],"wind":[180],"farm-equipped":[181],"storage":[182],"control":[183],"problem.":[184]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4404405868","counts_by_year":[],"updated_date":"2025-04-04T20:11:58.628399","created_date":"2024-11-16"}