{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:57:00Z","timestamp":1740103020111,"version":"3.37.3"},"publisher-location":"New York, NY, USA","reference-count":11,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657987","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T16:40:05Z","timestamp":1720716005000},"page":"3047-3050","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Multimodal Representation and Retrieval [MRR 2024]"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4544-2078","authenticated-orcid":false,"given":"Xinliang","family":"Zhu","sequence":"first","affiliation":[{"name":"Amazon, Palo Alto, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8233-4301","authenticated-orcid":false,"given":"Arnab","family":"Dhua","sequence":"additional","affiliation":[{"name":"Amazon, Palo Alto, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3509-582X","authenticated-orcid":false,"given":"Douglas","family":"Gray","sequence":"additional","affiliation":[{"name":"Amazon, Palo Alto, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6582-3863","authenticated-orcid":false,"given":"I. Zeki","family":"Yalniz","sequence":"additional","affiliation":[{"name":"Meta, Menlo Park, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5729-0945","authenticated-orcid":false,"given":"Tan","family":"Yu","sequence":"additional","affiliation":[{"name":"Nvidia, Santa Clara, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9659-1551","authenticated-orcid":false,"given":"Mohamed","family":"Elhoseiny","sequence":"additional","affiliation":[{"name":"King Abdullah University of Science and Technology, Thuwal, Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7074-3219","authenticated-orcid":false,"given":"Bryan","family":"Plummer","sequence":"additional","affiliation":[{"name":"Boston University, Boston, MA, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Yahoo Finance. 2023. Amazon updates visual search AR search and more in challenge to Google. https:\/\/finance.yahoo.com\/news\/amazon-updates-visual-search-ar-141147559.html. Accessed: 2024-01-09."},{"key":"e_1_3_2_1_2_1","volume-title":"Boon: A Neural Search Engine for Cross-Modal Information Retrieval. arXiv preprint arXiv:2307.14240","author":"Gong Yan","year":"2023","unstructured":"Yan Gong and Georgina Cosma. 2023. Boon: A Neural Search Engine for Cross-Modal Information Retrieval. arXiv preprint arXiv:2307.14240 (2023)."},{"key":"e_1_3_2_1_3_1","unstructured":"Google. 2022. Go beyond the search box: Introducing multisearch. https:\/\/blog.google\/products\/search\/multisearch\/. Accessed: 2024-01-09."},{"key":"e_1_3_2_1_4_1","volume-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"International Conference on Machine Learning. PMLR, 12888--12900","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International Conference on Machine Learning. PMLR, 12888--12900."},{"key":"e_1_3_2_1_6_1","volume-title":"Align before fuse: Vision and language representation learning with momentum distillation. Advances in neural information processing systems","author":"Li Junnan","year":"2021","unstructured":"Junnan Li, Ramprasaath Selvaraju, Akhilesh Gotmare, Shafiq Joty, Caiming Xiong, and Steven Chu Hong Hoi. 2021. Align before fuse: Vision and language representation learning with momentum distillation. Advances in neural information processing systems , Vol. 34 (2021), 9694--9705."},{"key":"e_1_3_2_1_7_1","volume-title":"Visual instruction tuning. arXiv preprint arXiv:2304.08485","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual instruction tuning. arXiv preprint arXiv:2304.08485 (2023)."},{"key":"e_1_3_2_1_8_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"volume-title":"Workshop. 2023 a. 6th Multimodal Learning and Applications Workshop. https:\/\/mula-workshop.github.io\/. Accessed: 2024-01-09","author":"CVPR","key":"e_1_3_2_1_9_1","unstructured":"CVPR Workshop. 2023 a. 6th Multimodal Learning and Applications Workshop. https:\/\/mula-workshop.github.io\/. Accessed: 2024-01-09."},{"volume-title":"Workshop. 2023 b. 1st MMIR Deep Multimodal Learning for Information Retrieval. https:\/\/videorelation.nextcenter.org\/MMIR23\/. Accessed: 2024-01-09","author":"MM","key":"e_1_3_2_1_10_1","unstructured":"MM Workshop. 2023 b. 1st MMIR Deep Multimodal Learning for Information Retrieval. https:\/\/videorelation.nextcenter.org\/MMIR23\/. Accessed: 2024-01-09."},{"volume-title":"Workshop. 2023 c. eCom'23: The SIGIR 2023 Workshop on eCommerce. https:\/\/sigir.org\/sigir2023\/program\/workshops\/. Accessed: 2024-01-09","author":"SIGIR","key":"e_1_3_2_1_11_1","unstructured":"SIGIR Workshop. 2023 c. eCom'23: The SIGIR 2023 Workshop on eCommerce. https:\/\/sigir.org\/sigir2023\/program\/workshops\/. Accessed: 2024-01-09."}],"event":{"name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Washington DC USA","acronym":"SIGIR 2024"},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657987","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,18]],"date-time":"2024-10-18T05:44:28Z","timestamp":1729230268000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657987"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":11,"alternative-id":["10.1145\/3626772.3657987","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657987","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}