{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,9]],"date-time":"2024-09-09T10:34:54Z","timestamp":1725878094382},"reference-count":80,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2021YFB3900504"],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Geosci. Remote Sensing"],"published-print":{"date-parts":[[2023]]},"DOI":"10.1109\/tgrs.2023.3332219","type":"journal-article","created":{"date-parts":[[2023,11,13]],"date-time":"2023-11-13T19:17:58Z","timestamp":1699903078000},"page":"1-16","source":"Crossref","is-referenced-by-count":11,"title":["RingMo-SAM: A Foundation Model for Segment Anything in Multimodal Remote-Sensing Images"],"prefix":"10.1109","volume":"61","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-4264-6868","authenticated-orcid":false,"given":"Zhiyuan","family":"Yan","sequence":"first","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-9428-9751","authenticated-orcid":false,"given":"Junxi","family":"Li","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-0177-7001","authenticated-orcid":false,"given":"Xuexue","family":"Li","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}]},{"given":"Ruixue","family":"Zhou","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-8903-2708","authenticated-orcid":false,"given":"Wenkai","family":"Zhang","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-4017-8885","authenticated-orcid":false,"given":"Yingchao","family":"Feng","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-3931-3974","authenticated-orcid":false,"given":"Wenhui","family":"Diao","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-8647-9627","authenticated-orcid":false,"given":"Kun","family":"Fu","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-0038-9816","authenticated-orcid":false,"given":"Xian","family":"Sun","sequence":"additional","affiliation":[{"name":"Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.660"},{"key":"ref2","article-title":"Semantic image segmentation with deep convolutional nets and fully connected CRFs","author":"Chen","year":"2014","journal-title":"arXiv:1412.7062"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00069"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_26"},{"key":"ref5","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"25","author":"Krizhevsky"},{"key":"ref6","volume-title":"GPT-4","year":"2023"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51070.2023.00110"},{"key":"ref9","article-title":"Segment everything everywhere all at once","author":"Zou","year":"2023","journal-title":"arXiv:2304.06718"},{"key":"ref10","first-page":"28","article-title":"iSAID: A large-scale dataset for instance segmentation in aerial images","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit.","author":"Zamir"},{"key":"ref11","volume-title":"ISPRS 2D Semantic Labeling Contest Vaihingen","year":"2020"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.jag.2021.102638"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2022.3170326"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_00990"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-022-3663-1"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-022-3599-y"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3317016"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3290411"},{"key":"ref19","article-title":"Rethinking atrous convolution for semantic image segmentation","author":"Chen","year":"2017","journal-title":"arXiv:1706.05587"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1802.02611"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2016.2554563"},{"key":"ref22","article-title":"Multimodal contrastive learning for remote sensing tasks","author":"Jain","year":"2022","journal-title":"arXiv:2209.02329"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-022-3610-5"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-022-3588-0"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2016.90"},{"key":"ref26","article-title":"Indoor semantic segmentation using depth information","author":"Couprie","year":"2013","journal-title":"arXiv:1301.3572"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2017.11.011"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.rse.2019.04.014"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.5555\/3524938.3525087"},{"key":"ref30","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neur. Inf. Process. Sys.","volume":"33","author":"Brown"},{"key":"ref31","volume-title":"Introducing Chatgpt","year":"2023"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"ref33","article-title":"Scaling vision transformers to 22 billion parameters","author":"Dehghani","year":"2023","journal-title":"arXiv:2302.05442"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01398"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3560815"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3194732"},{"key":"ref38","article-title":"Open-vocabulary semantic segmentation with mask-adapted CLIP","author":"Liang","year":"2022","journal-title":"arXiv:2210.04150"},{"key":"ref39","article-title":"Grounding DINO: Marrying DINO with grounded pre-training for open-set object detection","author":"Liu","year":"2023","journal-title":"arXiv:2303.05499"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"ref41","article-title":"SAM fails to segment anything?\u2014SAM-adapter: Adapting SAM in underperformed scenes: Camouflage, shadow, medical image segmentation, and more","author":"Chen","year":"2023","journal-title":"arXiv:2304.09148"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"ref43","article-title":"Prefix-tuning: Optimizing continuous prompts for generation","author":"Lisa Li","year":"2021","journal-title":"arXiv:2101.00190"},{"key":"ref44","first-page":"23716","article-title":"Flamingo: A visual language model for few-shot learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Alayrac"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_7"},{"key":"ref47","article-title":"CPT: Colorful prompt tuning for pre-trained vision-language models","author":"Yao","year":"2021","journal-title":"arXiv:2109.11797"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/tnnls.2023.3327962"},{"key":"ref49","article-title":"How can we know what language models know?","author":"Jiang","year":"2019","journal-title":"arXiv:1911.12543"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"ref51","article-title":"Exploring visual prompts for adapting large-scale models","author":"Bahng","year":"2022","journal-title":"arXiv:2203.17274"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00682"},{"key":"ref53","article-title":"RSPrompter: Learning to prompt for remote sensing instance segmentation based on visual foundation model","author":"Chen","year":"2023","journal-title":"arXiv:2306.16269"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01369"},{"key":"ref55","article-title":"A simple baseline for open-vocabulary semantic segmentation with pre-trained vision-language model","author":"Xu","year":"2021","journal-title":"arXiv:2112. 14757"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3130174"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2023.04.009"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2018.2873417"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2021.3106941"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00928"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00326"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00584"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2022.3176603"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2020.3032672"},{"key":"ref69","first-page":"15908","article-title":"Transformer in transformer","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Han"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2102.04306"},{"key":"ref71","article-title":"SAN: Scale-aware network for semantic segmentation of high-resolution aerial images","author":"Lin","year":"2019","journal-title":"arXiv:1907.03089"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01625"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/JSTARS.2021.3119654"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2020.2976658"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2021.3063799"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1909.11065"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref79","article-title":"LoveDA: A remote sensing land-cover dataset for domain adaptive semantic segmentation","volume-title":"Proc. Neural Inf. Process. Syst. Track Datasets Benchmarks","volume":"1","author":"Wang"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1016\/j.isprsjprs.2021.12.004"}],"container-title":["IEEE Transactions on Geoscience and Remote Sensing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/36\/10006360\/10315957.pdf?arnumber=10315957","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,2]],"date-time":"2024-03-02T18:07:08Z","timestamp":1709402828000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10315957\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"references-count":80,"URL":"https:\/\/doi.org\/10.1109\/tgrs.2023.3332219","relation":{},"ISSN":["0196-2892","1558-0644"],"issn-type":[{"value":"0196-2892","type":"print"},{"value":"1558-0644","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]}}}