{"id":"https://openalex.org/W3110954408","doi":"https://doi.org/10.1109/scc49832.2020.00017","title":"BSE-MAML: Model Agnostic Meta-Reinforcement Learning via Bayesian Structured Exploration","display_name":"BSE-MAML: Model Agnostic Meta-Reinforcement Learning via Bayesian Structured Exploration","publication_year":2020,"publication_date":"2020-11-01","ids":{"openalex":"https://openalex.org/W3110954408","doi":"https://doi.org/10.1109/scc49832.2020.00017","mag":"3110954408"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/scc49832.2020.00017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100342599","display_name":"Haonan Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"funder","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haonan Wang","raw_affiliation_strings":["National Lab for Parallel and Distributed Processing, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National Lab for Parallel and Distributed Processing, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101673014","display_name":"Yiyun Zhang","orcid":"https://orcid.org/0000-0002-0848-6450"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"funder","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiyun Zhang","raw_affiliation_strings":["National Lab for Parallel and Distributed Processing, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National Lab for Parallel and Distributed Processing, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039795290","display_name":"Dawei Feng","orcid":"https://orcid.org/0000-0002-7587-8905"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"funder","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dawei Feng","raw_affiliation_strings":["National Lab for Parallel and Distributed Processing, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National Lab for Parallel and Distributed Processing, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100440903","display_name":"Dongsheng Li","orcid":"https://orcid.org/0000-0001-9743-2034"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"funder","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongsheng Li","raw_affiliation_strings":["National Lab for Parallel and Distributed Processing, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National Lab for Parallel and Distributed Processing, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051363049","display_name":"Feng Huang","orcid":"https://orcid.org/0000-0002-0740-9373"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"funder","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Huang","raw_affiliation_strings":["National Lab for Parallel and Distributed Processing, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National Lab for Parallel and Distributed Processing, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.151,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.386203,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":73,"max":76},"biblio":{"volume":null,"issue":null,"first_page":"60","last_page":"67"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9921,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9775,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.83422685},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7544658},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.5833709},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5129278},{"id":"https://openalex.org/C2781002164","wikidata":"https://www.wikidata.org/wiki/Q6822311","display_name":"Meta learning (computer science)","level":3,"score":0.49021512},{"id":"https://openalex.org/C33724603","wikidata":"https://www.wikidata.org/wiki/Q812540","display_name":"Bayesian network","level":2,"score":0.42131534},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41221195},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.111754656},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.054311365},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/scc49832.2020.00017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":null}],"datasets":[],"versions":[],"referenced_works_count":42,"referenced_works":["https://openalex.org/W2121863487","https://openalex.org/W2124352385","https://openalex.org/W2158782408","https://openalex.org/W2161966552","https://openalex.org/W2173248099","https://openalex.org/W2265908272","https://openalex.org/W2280163991","https://openalex.org/W2297111485","https://openalex.org/W2342662072","https://openalex.org/W2489939061","https://openalex.org/W2511837229","https://openalex.org/W2550182557","https://openalex.org/W2578206533","https://openalex.org/W2596982695","https://openalex.org/W2604763608","https://openalex.org/W2606433045","https://openalex.org/W2606757878","https://openalex.org/W2784596339","https://openalex.org/W2785342287","https://openalex.org/W2787933113","https://openalex.org/W2788904251","https://openalex.org/W2805481182","https://openalex.org/W2885550588","https://openalex.org/W2921646839","https://openalex.org/W2923504512","https://openalex.org/W2949442184","https://openalex.org/W2952526277","https://openalex.org/W2954619248","https://openalex.org/W2963523627","https://openalex.org/W2963641140","https://openalex.org/W2963775850","https://openalex.org/W2963864421","https://openalex.org/W2963938771","https://openalex.org/W2963956018","https://openalex.org/W2970745960","https://openalex.org/W3000152304","https://openalex.org/W3009254104","https://openalex.org/W3014189359","https://openalex.org/W4214717370","https://openalex.org/W4285719527","https://openalex.org/W4295676892","https://openalex.org/W4300971732"],"related_works":["https://openalex.org/W98577079","https://openalex.org/W4383109125","https://openalex.org/W4319309271","https://openalex.org/W4306904969","https://openalex.org/W4294873804","https://openalex.org/W3130669838","https://openalex.org/W2891227010","https://openalex.org/W2785397462","https://openalex.org/W2548988175","https://openalex.org/W2091347716"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1],"learning":[2,55,136],"(RL)":[3],"is":[4,23],"playing":[5],"an":[6],"increasingly":[7],"important":[8],"role":[9],"in":[10,101,120,122],"web":[11],"services":[12],"such":[13],"as":[14],"news":[15],"recommendation,":[16],"vulnerability":[17],"detection,":[18],"and":[19,93,139],"personalized":[20],"services.":[21],"Exploration":[22],"a":[24,45,78],"key":[25],"component":[26],"of":[27],"RL,":[28],"which":[29],"determines":[30],"whether":[31],"these":[32],"RL-based":[33],"applications":[34],"could":[35,62],"find":[36],"effective":[37],"solutions":[38],"eventually.":[39],"In":[40],"this":[41],"paper,":[42],"we":[43],"propose":[44],"novel":[46,102],"gradient-based":[47],"fast":[48],"adaptation":[49],"approach":[50],"for":[51],"model":[52],"agnostic":[53],"meta-reinforcement":[54],"via":[56,77],"Bayesian":[57,79],"structure":[58],"exploration":[59,65,96,121,137,141],"(BSE-MAML).":[60],"BSE-MAML":[61,116],"effectively":[63],"learn":[64],"strategies":[66,97],"from":[67],"prior":[68],"experience":[69],"by":[70,84],"updating":[71],"policy":[72],"with":[73,125],"embedding":[74],"latent":[75,85],"space":[76,86],"mechanism.":[80],"Coherent":[81],"stochasticity":[82],"injected":[83],"are":[87],"more":[88],"efficient":[89],"than":[90],"random":[91],"noise,":[92],"can":[94],"produce":[95],"to":[98,109,129],"perform":[99],"well":[100],"environment.":[103],"We":[104],"have":[105],"conducted":[106],"extensive":[107],"experiments":[108],"evaluate":[110],"BSE-MAML.":[111],"Experimental":[112],"results":[113],"show":[114],"that":[115],"achieves":[117],"better":[118],"performance":[119],"realistic":[123],"environments":[124],"sparse":[126],"rewards,":[127],"compared":[128],"state-of-the-art":[130],"meta-RL":[131],"algorithms,":[132],"RL":[133],"methods":[134],"without":[135],"strategies,":[138],"task-agnostic":[140],"approaches.":[142]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3110954408","counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":2}],"updated_date":"2025-04-19T03:32:50.947502","created_date":"2020-12-21"}