{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:38:33Z","timestamp":1740101913952,"version":"3.37.3"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100015599","name":"Toyota Research Institute","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100015599","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10160888","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T17:20:56Z","timestamp":1688491256000},"page":"9515-9522","source":"Crossref","is-referenced-by-count":4,"title":["Visuomotor Control in Multi-Object Scenes Using Object-Aware Representations"],"prefix":"10.1109","author":[{"given":"Negin","family":"Heravi","sequence":"first","affiliation":[{"name":"Stanford University"}]},{"given":"Ayzaan","family":"Wahid","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Corey","family":"Lynch","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Pete","family":"Florence","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Travis","family":"Armstrong","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Jonathan","family":"Tompson","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Pierre","family":"Sermanet","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]},{"given":"Jeannette","family":"Bohg","sequence":"additional","affiliation":[{"name":"Stanford University"}]},{"given":"Debidatta","family":"Dwibedi","sequence":"additional","affiliation":[{"name":"Robotics at Google"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1109\/IROS.2018.8593951"},{"key":"ref12","article-title":"Self-supervised co-training for video representation learning","author":"han","year":"2020","journal-title":"NeurIPS"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.1109\/TRO.2019.2959445"},{"key":"ref14","article-title":"Grasp2vec: Learning object representations from self-supervised grasping","author":"jang","year":"2018","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.1109\/TPAMI.2012.261"},{"year":"2022","author":"lynch","journal-title":"Interactive language Talking to robots in real time","key":"ref30"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/ICCV.2015.320"},{"key":"ref10","first-page":"557","article-title":"What matters in un-supervised optical flow","author":"jonschkowski","year":"2020","journal-title":"Computer Vision-ECCV 2020 16th European Conference"},{"key":"ref32","article-title":"Conditional Object-Centric Learning fromVideo","author":"kipf","year":"2022","journal-title":"International Conference on Learning Representations (ICLR)"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1109\/ICRA.2016.7487173"},{"year":"2018","author":"sermanet","journal-title":"Time-contrastive networks Self-supervised learning from video","key":"ref1"},{"key":"ref17","article-title":"Xirl: Cross-embodiment inverse reinforcement learning","author":"zakka","year":"2021","journal-title":"Conference on Robot Learning (CoRL)"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/ICRA40945.2020.9196714"},{"doi-asserted-by":"publisher","key":"ref19","DOI":"10.1109\/ICRA.2019.8794224"},{"doi-asserted-by":"publisher","key":"ref18","DOI":"10.1109\/ICRA.2018.8461196"},{"key":"ref24","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref23","article-title":"Implicit behavioral cloning","author":"florence","year":"2021","journal-title":"Conference on Robot Learning (CoRL)"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.23915\/distill.00003"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1109\/ICCV48922.2021.00709"},{"key":"ref20","article-title":"kpam: Keypoint affordances for category-level robotic manipulation","author":"manuelli","year":"2019","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref22","DOI":"10.1109\/IROS51168.2021.9636711"},{"key":"ref21","article-title":"Dense object nets: Learning dense visual object descriptors by and for robotic manipulation","author":"florence","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref28","first-page":"37","article-title":"Autoencoders, unsupervised learning, and deep architectures","volume":"27","author":"baldi","year":"2012","journal-title":"Proceedings of ICML Workshop on Unsupervised and Transfer Learning ser Proceedings of Machine Learning Research"},{"key":"ref27","article-title":"Improved baselines with momentum contrastive learning","author":"chen","year":"2020","journal-title":"ArXiv Preprint"},{"year":"2016","author":"coumans","journal-title":"Pybullet a python module for physics simulation for games robotics and machine learning","key":"ref29"},{"key":"ref8","article-title":"Momentum contrast for unsupervised visual representation learning","author":"he","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref7","article-title":"Object-centric learning with slot attention","author":"locatello","year":"2020","journal-title":"ser NIPS'20 Red Hook"},{"key":"ref9","article-title":"A simple framework for contrastive learning of visual representations","author":"chen","year":"2020","journal-title":"ArXiv Preprint"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1109\/LRA.2019.2956365"},{"key":"ref3","article-title":"Unsupervised learning of object keypoints for perception and control","author":"kulkarni","year":"2019","journal-title":"ArXiv Preprint"},{"year":"2021","author":"yuan","journal-title":"SORNet Spatial object-centric representations for sequential manipulation","key":"ref6"},{"key":"ref5","article-title":"Key-points into the future: Self-supervised correspondence in model-based reinforcement learning","author":"manuelli","year":"2020","journal-title":"ArXiv Preprint"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","start":{"date-parts":[[2023,5,29]]},"location":"London, United Kingdom","end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10160888.pdf?arnumber=10160888","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T17:33:19Z","timestamp":1690219999000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10160888\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10160888","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}