{"id":"https://openalex.org/W3170823761","doi":"https://doi.org/10.1609/aaai.v36i7.20674","title":"Same State, Different Task: Continual Reinforcement Learning without Interference","display_name":"Same State, Different Task: Continual Reinforcement Learning without Interference","publication_year":2022,"publication_date":"2022-06-28","ids":{"openalex":"https://openalex.org/W3170823761","doi":"https://doi.org/10.1609/aaai.v36i7.20674","mag":"3170823761"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v36i7.20674","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20674/20433","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20674/20433","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056415825","display_name":"Samuel Kessler","orcid":"https://orcid.org/0009-0007-4940-8575"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Samuel Kessler","raw_affiliation_strings":["University of Oxford"],"affiliations":[{"raw_affiliation_string":"University of Oxford","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083828420","display_name":"Jack Parker-Holder","orcid":null},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jack Parker-Holder","raw_affiliation_strings":["University of Oxford"],"affiliations":[{"raw_affiliation_string":"University of Oxford","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008365801","display_name":"Philip Ball","orcid":"https://orcid.org/0000-0001-5896-6447"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Philip Ball","raw_affiliation_strings":["University of Oxford"],"affiliations":[{"raw_affiliation_string":"University of Oxford","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090331439","display_name":"Stefan Zohren","orcid":"https://orcid.org/0000-0002-3392-0394"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Stefan Zohren","raw_affiliation_strings":["University of Oxford"],"affiliations":[{"raw_affiliation_string":"University of Oxford","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058617210","display_name":"Stephen Roberts","orcid":"https://orcid.org/0000-0002-9305-9268"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Stephen J. Roberts","raw_affiliation_strings":["University of Oxford"],"affiliations":[{"raw_affiliation_string":"University of Oxford","institution_ids":["https://openalex.org/I40120149"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.285,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.658299,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":91},"biblio":{"volume":"36","issue":"7","first_page":"7143","last_page":"7151"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9938,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9936,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.44393328}],"concepts":[{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.9435849},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.86660045},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8249416},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.76149297},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.637527},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.57649565},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48300326},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.47500384},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4646532},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.44393328},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07131216},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.070470065},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v36i7.20674","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20674/20433","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.02940","pdf_url":"http://arxiv.org/pdf/2106.02940","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v36i7.20674","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/20674/20433","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, justice, and strong institutions","score":0.59}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":93,"referenced_works":["https://openalex.org/W1515851193","https://openalex.org/W1522301498","https://openalex.org/W1560021816","https://openalex.org/W1570963478","https://openalex.org/W1663973292","https://openalex.org/W1821462560","https://openalex.org/W1959608418","https://openalex.org/W2076337359","https://openalex.org/W2097998348","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2155968351","https://openalex.org/W2257979135","https://openalex.org/W2426267443","https://openalex.org/W2440926996","https://openalex.org/W2473930607","https://openalex.org/W2560647685","https://openalex.org/W2737492962","https://openalex.org/W2746553466","https://openalex.org/W2751777443","https://openalex.org/W2754517384","https://openalex.org/W2761873684","https://openalex.org/W2765101016","https://openalex.org/W2781726626","https://openalex.org/W2786036274","https://openalex.org/W2786446225","https://openalex.org/W2789525339","https://openalex.org/W2804175194","https://openalex.org/W2804746922","https://openalex.org/W2902456977","https://openalex.org/W2904246096","https://openalex.org/W2939137134","https://openalex.org/W2947461406","https://openalex.org/W2947843732","https://openalex.org/W2949819354","https://openalex.org/W2954360742","https://openalex.org/W2962724315","https://openalex.org/W2962902376","https://openalex.org/W2963092607","https://openalex.org/W2963120839","https://openalex.org/W2963238274","https://openalex.org/W2963390791","https://openalex.org/W2963559848","https://openalex.org/W2963577698","https://openalex.org/W2963703618","https://openalex.org/W2963850662","https://openalex.org/W2963960193","https://openalex.org/W2964048876","https://openalex.org/W2964059111","https://openalex.org/W2964121744","https://openalex.org/W2964227312","https://openalex.org/W2964291307","https://openalex.org/W2967925425","https://openalex.org/W2970066656","https://openalex.org/W2970586779","https://openalex.org/W2970748152","https://openalex.org/W2978644431","https://openalex.org/W2990747716","https://openalex.org/W2995040055","https://openalex.org/W2995795252","https://openalex.org/W2997646596","https://openalex.org/W3008658065","https://openalex.org/W3034973310","https://openalex.org/W3040863728","https://openalex.org/W3042725540","https://openalex.org/W3092475441","https://openalex.org/W3099351824","https://openalex.org/W3100678069","https://openalex.org/W3103780890","https://openalex.org/W3104090274","https://openalex.org/W3105010138","https://openalex.org/W3106479218","https://openalex.org/W3106539628","https://openalex.org/W3107153805","https://openalex.org/W3116551962","https://openalex.org/W3117215073","https://openalex.org/W3134456772","https://openalex.org/W3173061714","https://openalex.org/W4212863985","https://openalex.org/W4254755460","https://openalex.org/W4287548909","https://openalex.org/W4295113716","https://openalex.org/W4295883599","https://openalex.org/W4297573784","https://openalex.org/W4297797010","https://openalex.org/W4298116016","https://openalex.org/W4298206671","https://openalex.org/W4300525946","https://openalex.org/W4301163820","https://openalex.org/W4319988532","https://openalex.org/W4394666657","https://openalex.org/W567721252","https://openalex.org/W582134693"],"related_works":["https://openalex.org/W4310285384","https://openalex.org/W4289718052","https://openalex.org/W4287549553","https://openalex.org/W3183027292","https://openalex.org/W3116498279","https://openalex.org/W2974871044","https://openalex.org/W2794885965","https://openalex.org/W2164121020","https://openalex.org/W2145559838","https://openalex.org/W2104218666"],"abstract_inverted_index":{"Continual":[0],"Learning":[1],"(CL)":[2],"considers":[3],"the":[4,95,108,130,193,203,211],"problem":[5,109],"of":[6,14,52,110,132,207],"training":[7,251],"an":[8,85,197,225],"agent":[9,86,213],"sequentially":[10],"on":[11,21,37,119,160],"a":[12,38,46,50,74,137,147,161,182],"set":[13],"tasks":[15,61],"while":[16,240],"seeking":[17],"to":[18,55,141,171,191,214,244,247],"retain":[19],"performance":[20,36,249],"all":[22],"previous":[23],"tasks.":[24],"A":[25],"key":[26],"challenge":[27],"in":[28,58,80,129,167,229],"CL":[29,116,237],"is":[30,42,189,242],"catastrophic":[31],"forgetting,":[32,57],"which":[33],"arises":[34],"when":[35,44,84,250],"previously":[39],"mastered":[40],"task":[41,199],"reduced":[43],"learning":[45,82],"new":[47,162],"task.":[48,163],"While":[49],"variety":[51],"methods":[53,117,238],"exist":[54],"combat":[56],"some":[59],"cases":[60],"are":[62,169],"fundamentally":[63],"incompatible":[64],"with":[65,124],"each":[66,158],"other":[67],"and":[68,186],"thus":[69],"cannot":[70],"be":[71,88],"learnt":[72,219],"by":[73],"single":[75,120],"policy.":[76],"This":[77],"can":[78],"occur,":[79],"reinforcement":[81],"(RL)":[83],"may":[87],"rewarded":[89],"for":[90,196],"achieving":[91],"different":[92,217,222],"goals":[93],"from":[94,107,202],"same":[96],"observation.":[97],"In":[98],"this":[99,103,143],"paper":[100],"we":[101,135,177],"formalize":[102],"\"interference\"":[104],"as":[105,181],"distinct":[106],"forgetting.":[111],"We":[112,227],"show":[113,187,228],"that":[114,233],"existing":[115,234],"based":[118,236],"neural":[121],"network":[122],"predictors":[123],"shared":[125,151],"replay":[126,235],"buffers":[127],"fail":[128],"presence":[131],"interference.":[133,173],"Instead,":[134],"propose":[136],"simple":[138],"method,":[139],"OWL,":[140],"address":[142],"challenge.":[144],"OWL":[145,168,212,241],"learns":[146],"factorized":[148],"policy,":[149],"using":[150,200],"feature":[152],"extraction":[153],"layers,":[154],"but":[155],"separate":[156,165],"heads,":[157],"specializing":[159],"The":[164,205],"heads":[166],"used":[170],"prevent":[172],"At":[174],"test":[175],"time,":[176],"formulate":[178],"policy":[179,195],"selection":[180],"multi-armed":[183],"bandit":[184,208],"problem,":[185],"it":[188],"possible":[190],"select":[192],"best":[194],"unknown":[198],"feedback":[201],"environment.":[204],"use":[206],"algorithms":[209],"allows":[210],"constructively":[215],"re-use":[216],"continually":[218],"policies":[220],"at":[221],"times":[223],"during":[224],"episode.":[226],"multiple":[230],"RL":[231],"environments":[232],"fail,":[239],"able":[243],"achieve":[245],"close":[246],"optimal":[248],"sequentially.":[252]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3170823761","counts_by_year":[{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2024-12-08T17:45:36.978921","created_date":"2021-06-22"}