{"id":"https://openalex.org/W4224301188","doi":"https://doi.org/10.48550/arxiv.2204.09560","title":"Understanding and Preventing Capacity Loss in Reinforcement Learning","display_name":"Understanding and Preventing Capacity Loss in Reinforcement Learning","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4224301188","doi":"https://doi.org/10.48550/arxiv.2204.09560"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2204.09560","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2204.09560","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089486474","display_name":"Clare Lyle","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lyle, Clare","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047099585","display_name":"Mark Rowland","orcid":"https://orcid.org/0000-0002-5567-9562"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rowland, Mark","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5037981481","display_name":"Will Dabney","orcid":"https://orcid.org/0000-0003-4600-5520"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dabney, Will","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.824796,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":88,"max":90},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.92,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.92,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.8146157},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization","score":0.6424484},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.53141576}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.87030447},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.8146157},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7207312},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.6424484},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.60499525},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.54498136},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.53141576},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.46126822},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.45719612},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43247342},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.065484256},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2204.09560","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2204.09560","pdf_url":"http://arxiv.org/pdf/2204.09560","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2204.09560","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2204.09560","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W419730749","https://openalex.org/W3204184292","https://openalex.org/W3176564347","https://openalex.org/W3095877357","https://openalex.org/W3031039437","https://openalex.org/W2355833770","https://openalex.org/W2072565696","https://openalex.org/W2050451745","https://openalex.org/W1985458517","https://openalex.org/W183202219"],"abstract_inverted_index":{"The":[0],"reinforcement":[1],"learning":[2,35,138],"(RL)":[3],"problem":[4,16],"is":[5,77,128],"rife":[6],"with":[7],"sources":[8],"of":[9,21,49,71,103],"non-stationarity,":[10],"making":[11],"it":[12],"a":[13,26,47,69,88,101],"notoriously":[14],"difficult":[15],"domain":[17],"for":[18],"the":[19,137,143],"application":[20],"neural":[22],"networks.":[23],"We":[24,62,85,122],"identify":[25],"mechanism":[27],"by":[28,99],"which":[29],"non-stationary":[30],"prediction":[31],"targets":[32],"can":[33],"prevent":[34],"progress":[36],"in":[37,68,82,115],"deep":[38],"RL":[39,72],"agents:":[40],"\\textit{capacity":[41],"loss},":[42],"whereby":[43],"networks":[44],"trained":[45],"on":[46],"sequence":[48],"target":[50],"values":[51],"lose":[52],"their":[53,58],"ability":[54],"to":[55,80,111,130,133],"quickly":[56],"update":[57],"predictions":[59],"over":[60],"time.":[61],"demonstrate":[63],"that":[64,95,124],"capacity":[65,126],"loss":[66,127],"occurs":[67],"range":[70],"agents":[73,132],"and":[74,76],"environments,":[75],"particularly":[78],"damaging":[79],"performance":[81,113],"sparse-reward":[83,116],"tasks.":[84],"then":[86],"present":[87],"simple":[89],"regularizer,":[90],"Initial":[91],"Feature":[92],"Regularization":[93],"(InFeR),":[94],"mitigates":[96],"this":[97],"phenomenon":[98],"regressing":[100],"subspace":[102],"features":[104],"towards":[105],"its":[106],"value":[107],"at":[108],"initialization,":[109],"leading":[110],"significant":[112],"improvements":[114],"environments":[117],"such":[118],"as":[119],"Montezuma's":[120],"Revenge.":[121],"conclude":[123],"preventing":[125],"crucial":[129],"enable":[131],"maximally":[134],"benefit":[135],"from":[136],"signals":[139],"they":[140],"obtain":[141],"throughout":[142],"entire":[144],"training":[145],"trajectory.":[146]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4224301188","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":7}],"updated_date":"2025-04-29T15:05:58.412085","created_date":"2022-04-26"}