{"id":"https://openalex.org/W4378501141","doi":"https://doi.org/10.48550/arxiv.2305.15930","title":"End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes","display_name":"End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4378501141","doi":"https://doi.org/10.48550/arxiv.2305.15930"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.15930","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2305.15930","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018539132","display_name":"Alexandre Max Maraval","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maraval, Alexandre","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028712584","display_name":"Matthieu Zimmer","orcid":"https://orcid.org/0000-0002-8029-308X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zimmer, Matthieu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012157944","display_name":"Antoine Grosnit","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grosnit, Antoine","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5034334762","display_name":"Haitham Bou Ammar","orcid":"https://orcid.org/0000-0002-6083-6171"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ammar, Haitham Bou","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.712479,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":77,"max":82},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9955,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9903,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.42796582}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7590821},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.6805211},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6262896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.54641056},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5196784},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.48676324},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.48427767},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.45204216},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.45042595},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.42796582},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10409209},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.15930","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2305.15930","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2305.15930","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4376155396","https://openalex.org/W4292701710","https://openalex.org/W4281847915","https://openalex.org/W2971351794","https://openalex.org/W2527791220","https://openalex.org/W2174986909","https://openalex.org/W2155070487","https://openalex.org/W2101991911","https://openalex.org/W1947085858","https://openalex.org/W1925875298"],"abstract_inverted_index":{"Meta-Bayesian":[0],"optimisation":[1,11,176],"(meta-BO)":[2],"aims":[3],"to":[4,54,70,94,149],"improve":[5],"the":[6,44,72,112,136,147,183],"sample":[7],"efficiency":[8],"of":[9,34,74,116,146,186],"Bayesian":[10],"by":[12],"leveraging":[13],"data":[14],"from":[15,87],"related":[16],"tasks.":[17],"While":[18],"previous":[19],"methods":[20],"successfully":[21],"meta-learn":[22],"either":[23],"a":[24,107,119,123,151],"surrogate":[25],"model":[26,154],"or":[27],"an":[28,38,140,156],"acquisition":[29,56,76],"function":[30],"independently,":[31],"joint":[32],"training":[33,83],"both":[35],"components":[36],"remains":[37],"open":[39],"challenge.":[40],"This":[41],"paper":[42],"proposes":[43],"first":[45],"end-to-end":[46,64],"differentiable":[47],"meta-BO":[48],"framework":[49,65],"that":[50,82,111,143,161],"generalises":[51],"neural":[52,85],"processes":[53,86],"learn":[55,150],"functions":[57],"via":[58],"transformer":[59],"architectures.":[60],"We":[61,102,159],"enable":[62],"this":[63,104,132],"with":[66,89,106,139],"reinforcement":[67],"learning":[68],"(RL)":[69],"tackle":[71,131],"lack":[73],"labelled":[75],"data.":[77],"Early":[78],"on,":[79],"we":[80,134],"notice":[81],"transformer-based":[84],"scratch":[88],"RL":[90,137],"is":[91],"challenging":[92],"due":[93],"insufficient":[95],"supervision,":[96],"especially":[97],"when":[98],"rewards":[99],"are":[100],"sparse.":[101],"formalise":[103],"claim":[105],"combinatorial":[108],"analysis":[109],"showing":[110],"widely":[113],"used":[114],"notion":[115],"regret":[117,166],"as":[118,155],"reward":[120],"signal":[121],"exhibits":[122],"logarithmic":[124],"sparsity":[125],"pattern":[126],"in":[127,171,182],"trajectory":[128],"lengths.":[129],"To":[130],"problem,":[133],"augment":[135],"objective":[138],"auxiliary":[141],"task":[142],"guides":[144],"part":[145],"architecture":[148],"valid":[152],"probabilistic":[153],"inductive":[157],"bias.":[158],"demonstrate":[160],"our":[162],"method":[163],"achieves":[164],"state-of-the-art":[165],"results":[167],"against":[168],"various":[169],"baselines":[170],"experiments":[172],"on":[173],"standard":[174],"hyperparameter":[175],"tasks":[177],"and":[178,192],"also":[179],"outperforms":[180],"others":[181],"real-world":[184],"problems":[185],"mixed-integer":[187],"programming":[188],"tuning,":[189],"antibody":[190],"design,":[191],"logic":[193],"synthesis":[194],"for":[195],"electronic":[196],"design":[197],"automation.":[198]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4378501141","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-03-21T09:40:47.585967","created_date":"2023-05-27"}