{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,11]],"date-time":"2024-08-11T00:27:27Z","timestamp":1723336047669},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1109\/icra57147.2024.10610919","type":"proceedings-article","created":{"date-parts":[[2024,8,8]],"date-time":"2024-08-08T17:51:05Z","timestamp":1723139465000},"page":"12942-12948","source":"Crossref","is-referenced-by-count":0,"title":["Merging Decision Transformers: Weight Averaging for Forming Multi-Task Policies"],"prefix":"10.1109","author":[{"given":"Daniel","family":"Lawson","sequence":"first","affiliation":[{"name":"Purdue University,Department of Computer Science,West Lafayette,IN,USA,47907"}]},{"given":"Ahmed H.","family":"Qureshi","sequence":"additional","affiliation":[{"name":"Purdue University,Department of Computer Science,West Lafayette,IN,USA,47907"}]}],"member":"263","reference":[{"journal-title":"Git re-basin: Merging models modulo permutation symmetries","year":"2022","author":"Ainsworth","key":"ref1"},{"journal-title":"Layer normalization","year":"2016","author":"Ba","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/iros55552.2023.10342381"},{"journal-title":"Decision transformer: Reinforcement learning via sequence modeling","year":"2021","author":"Chen","key":"ref4"},{"journal-title":"Fusing finetuned models for better pretraining","year":"2022","author":"Choshen","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-eacl.153"},{"journal-title":"Cold fusion: Collaborative descent for distributed multitask finetuning","year":"2022","author":"Don-Yehiya","key":"ref7"},{"journal-title":"Learning universal policies via text-guided video generation","year":"2023","author":"Du","key":"ref8"},{"journal-title":"The role of permutation invariance in linear mode connectivity of neural networks","year":"2021","author":"Entezari","key":"ref9"},{"journal-title":"D4rl: Datasets for deep data-driven reinforcement learning","year":"2020","author":"Fu","key":"ref10"},{"journal-title":"Generalized decision transformer for offline hindsight information matching","year":"2021","author":"Furuta","key":"ref11"},{"journal-title":"Editing models with task arithmetic","year":"2022","author":"Ilharco","key":"ref12"},{"journal-title":"Patching open-vocabulary models by interpolating weights","year":"2022","author":"Ilharco","key":"ref13"},{"journal-title":"Vima-manipulation","year":"2022","author":"Jiang","key":"ref14"},{"journal-title":"Dataless knowledge fusion by merging weights of language models","year":"2022","author":"Jin","key":"ref15"},{"article-title":"Repair: Renormalizing permuted activations for interpolation repair","year":"2022","author":"Jordan","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1613\/jair.1.13673"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"journal-title":"Offline q-learning on diverse multi-task data both scales and generalizes","year":"2022","author":"Kumar","key":"ref19"},{"journal-title":"Pre-training for robots: Offline rl enables learning new tasks from a handful of trials","year":"2022","author":"Kumar","key":"ref20"},{"journal-title":"Multi-game decision transformers","year":"2022","author":"Lee","key":"ref21"},{"journal-title":"Branch-train-merge: Embarrassingly parallel training of expert language models","year":"2022","author":"Li","key":"ref22"},{"journal-title":"Pre-trained transformers as universal computation engines","year":"2021","author":"Lu","key":"ref23"},{"journal-title":"Merging models with fisher-weighted averaging","year":"2021","author":"Matena","key":"ref24"},{"journal-title":"Communication-efficient learning of deep networks from decentralized data","year":"2016","author":"McMahan","key":"ref25"},{"journal-title":"Pointer sentinel mixture models","year":"2016","author":"Merity","key":"ref26"},{"journal-title":"Re-basin via implicit sinkhorn differentiation","year":"2022","author":"Guerrero Pe\u00f1a","key":"ref27"},{"journal-title":"Formal algorithms for transformers","year":"2022","author":"Phuong","key":"ref28"},{"journal-title":"Improving language understanding by generative pre-training","year":"2018","author":"Radford","key":"ref29"},{"journal-title":"A generalist agent","year":"2022","author":"Reed","key":"ref30"},{"journal-title":"Can wikipedia help offline reinforcement learning?","year":"2022","author":"Reid","key":"ref31"},{"article-title":"Investigating multi-task pretraining and generalization in reinforcement learning","volume-title":"The Eleventh International Conference on Learning Representations","author":"Taiga","key":"ref32"},{"journal-title":"On the effect of pre-training for transformer in different modality on offline reinforcement learning","year":"2022","author":"Takagi","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"journal-title":"Attention is all you need","year":"2017","author":"Vaswani","key":"ref35"},{"journal-title":"Model soups: averaging weights of multiple fine-tuned models improves accuracy without increasing inference time","year":"2022","author":"Wortsman","key":"ref36"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52688.2022.00780"},{"journal-title":"On the feasibility of cross-task transfer with model-based reinforcement learning","year":"2022","author":"Xu","key":"ref38"}],"event":{"name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2024,5,13]]},"location":"Yokohama, Japan","end":{"date-parts":[[2024,5,17]]}},"container-title":["2024 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10609961\/10609862\/10610919.pdf?arnumber=10610919","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T05:59:13Z","timestamp":1723269553000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10610919\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/icra57147.2024.10610919","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]}}}