{"id":"https://openalex.org/W2990376820","doi":"https://doi.org/10.24963/ijcai.2020/368","title":"Self-Attentional Credit Assignment for Transfer in Reinforcement Learning","display_name":"Self-Attentional Credit Assignment for Transfer in Reinforcement Learning","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W2990376820","doi":"https://doi.org/10.24963/ijcai.2020/368","mag":"2990376820"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/368","pdf_url":"https://www.ijcai.org/proceedings/2020/0368.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"preprint","type_crossref":"proceedings-article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://www.ijcai.org/proceedings/2020/0368.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087706654","display_name":"Johan Ferret","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210113520","display_name":"Brain (Germany)","ror":"https://ror.org/01gamcy45","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210113520"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Johan Ferret","raw_affiliation_strings":["Google Research, Brain Team"],"affiliations":[{"raw_affiliation_string":"Google Research, Brain Team","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210113520"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040382220","display_name":"Rapha\u00ebl Marinier","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113520","display_name":"Brain (Germany)","ror":"https://ror.org/01gamcy45","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210113520"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Raphael Marinier","raw_affiliation_strings":["Google Research (Brain Team)"],"affiliations":[{"raw_affiliation_string":"Google Research (Brain Team)","institution_ids":["https://openalex.org/I4210113520","https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110482875","display_name":"Matthieu Geist","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113520","display_name":"Brain (Germany)","ror":"https://ror.org/01gamcy45","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210113520"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Matthieu Geist","raw_affiliation_strings":["Google Research (Brain Team)"],"affiliations":[{"raw_affiliation_string":"Google Research (Brain Team)","institution_ids":["https://openalex.org/I4210113520","https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065100569","display_name":"Olivier Pietquin","orcid":"https://orcid.org/0000-0002-5386-465X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210113520","display_name":"Brain (Germany)","ror":"https://ror.org/01gamcy45","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210113520"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Olivier Pietquin","raw_affiliation_strings":["Google Research (Brain Team)"],"affiliations":[{"raw_affiliation_string":"Google Research (Brain Team)","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210113520"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":13,"citation_normalized_percentile":{"value":0.999942,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":88,"max":89},"biblio":{"volume":null,"issue":null,"first_page":"2655","last_page":"2661"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9955,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.982,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generality","display_name":"Generality","score":0.8299377},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.69684243}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8583947},{"id":"https://openalex.org/C2780767217","wikidata":"https://www.wikidata.org/wiki/Q5532421","display_name":"Generality","level":2,"score":0.8299377},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8110225},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.69684243},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5897651},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5389427},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.51425403},{"id":"https://openalex.org/C2776175482","wikidata":"https://www.wikidata.org/wiki/Q1195816","display_name":"Transfer (computing)","level":2,"score":0.5099131},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.47981787},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45603666},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.45342588},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.080815405},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.06390932},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C542102704","wikidata":"https://www.wikidata.org/wiki/Q183257","display_name":"Psychotherapist","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":7,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/368","pdf_url":"https://www.ijcai.org/proceedings/2020/0368.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://hal.inria.fr/hal-03159832","pdf_url":"https://inria.hal.science/hal-03159832/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/1907.08027","pdf_url":"https://arxiv.org/pdf/1907.08027","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://inria.hal.science/hal-03159832/file/Credit_Alignment_HAL%20%281%29.pdf","pdf_url":"https://inria.hal.science/hal-03159832/file/Credit_Alignment_HAL%20%281%29.pdf","source":null,"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://hal.inria.fr/hal-03159832/file/Credit_Alignment_HAL%20%281%29.pdf","pdf_url":"https://hal.inria.fr/hal-03159832/file/Credit_Alignment_HAL%20%281%29.pdf","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://hal.inria.fr/hal-03159832/document","pdf_url":"https://hal.inria.fr/hal-03159832/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":["Centre National de la Recherche Scientifique"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.1907.08027","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2020/368","pdf_url":"https://www.ijcai.org/proceedings/2020/0368.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":["https://openalex.org/W2990376820","https://openalex.org/W3100915663"],"referenced_works_count":60,"referenced_works":["https://openalex.org/W1191599655","https://openalex.org/W1777239053","https://openalex.org/W2095705004","https://openalex.org/W2097381042","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2141559645","https://openalex.org/W2145339207","https://openalex.org/W2334782222","https://openalex.org/W2426267443","https://openalex.org/W2550182557","https://openalex.org/W2560647685","https://openalex.org/W2584377191","https://openalex.org/W2597655663","https://openalex.org/W2604763608","https://openalex.org/W2735995851","https://openalex.org/W2736601468","https://openalex.org/W2739083961","https://openalex.org/W2787387965","https://openalex.org/W2787501667","https://openalex.org/W2789517807","https://openalex.org/W2790924949","https://openalex.org/W2809013025","https://openalex.org/W2891076394","https://openalex.org/W2891369367","https://openalex.org/W2895453875","https://openalex.org/W2896457183","https://openalex.org/W2898436992","https://openalex.org/W2913350117","https://openalex.org/W2914261249","https://openalex.org/W2917052767","https://openalex.org/W2940744433","https://openalex.org/W2951032747","https://openalex.org/W2962739339","https://openalex.org/W2962858248","https://openalex.org/W2963026768","https://openalex.org/W2963085895","https://openalex.org/W2963088995","https://openalex.org/W2963184621","https://openalex.org/W2963199420","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2964059481","https://openalex.org/W2964097858","https://openalex.org/W2964185768","https://openalex.org/W2964227899","https://openalex.org/W2964308564","https://openalex.org/W2970705602","https://openalex.org/W2981413347","https://openalex.org/W2990032740","https://openalex.org/W4254755460","https://openalex.org/W4298174377","https://openalex.org/W4298845604","https://openalex.org/W4300395467","https://openalex.org/W4300971732","https://openalex.org/W4319988532","https://openalex.org/W4385245566","https://openalex.org/W4394652010","https://openalex.org/W567721252","https://openalex.org/W81137444"],"related_works":["https://openalex.org/W4381094582","https://openalex.org/W2369625323","https://openalex.org/W2364579609","https://openalex.org/W2353528968","https://openalex.org/W2201908702","https://openalex.org/W2045049461","https://openalex.org/W2032776242","https://openalex.org/W1978893398","https://openalex.org/W1977906818","https://openalex.org/W1522139108"],"abstract_inverted_index":{"The":[0],"ability":[1,48],"to":[2,5,49,62,75,96,101],"transfer":[3,22,76,122],"knowledge":[4],"novel":[6,73],"environments":[7],"and":[8,29,110,131],"tasks":[9,57],"is":[10,25,70],"a":[11,39,72,82,89,105],"sensible":[12],"desiderata":[13],"for":[14,78],"general":[15],"learning":[16,77],"agents.":[17],"Despite":[18],"the":[19,47,56,113,128],"apparent":[20],"promises,":[21],"in":[23,55],"RL":[24,64,79,140],"still":[26],"an":[27],"open":[28],"little":[30],"exploited":[31],"research":[32],"area.":[33],"In":[34],"this":[35],"paper,":[36],"we":[37,44],"take":[38],"brand-new":[40],"perspective":[41],"about":[42],"transfer:":[43],"suggest":[45],"that":[46,58,80,124],"assign":[50,102],"credit":[51,84,103],"unveils":[52],"structural":[53],"invariants":[54],"can":[59,118,133],"be":[60,119,134],"transferred":[61],"make":[63],"more":[65],"sample-efficient.":[66],"Our":[67],"main":[68],"contribution":[69],"SECRET,":[71],"approach":[74],"uses":[81],"backward-view":[83],"assignment":[85],"mechanism":[86],"based":[87],"on":[88,136],"self-attentive":[90],"architecture.":[91],"Two":[92],"aspects":[93],"are":[94],"key":[95],"its":[97],"generality:":[98],"it":[99,117,132],"learns":[100],"as":[104],"separate":[106],"offline":[107],"supervised":[108],"process":[109],"exclusively":[111],"modifies":[112],"reward":[114,129],"function.":[115],"Consequently,":[116],"supplemented":[120],"by":[121],"methods":[123],"do":[125],"not":[126],"modify":[127],"function":[130],"plugged":[135],"top":[137],"of":[138],"any":[139],"algorithm.":[141]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2990376820","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":1}],"updated_date":"2025-01-08T02:55:49.209891","created_date":"2019-12-05"}