{"id":"https://openalex.org/W4396909863","doi":"https://doi.org/10.1109/tgrs.2024.3401031","title":"Global-Local Information Soft-Alignment for Cross-Modal Remote-Sensing Image-Text Retrieval","display_name":"Global-Local Information Soft-Alignment for Cross-Modal Remote-Sensing Image-Text Retrieval","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4396909863","doi":"https://doi.org/10.1109/tgrs.2024.3401031"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tgrs.2024.3401031","pdf_url":null,"source":{"id":"https://openalex.org/S111326731","display_name":"IEEE Transactions on Geoscience and Remote Sensing","issn_l":"0196-2892","issn":["0196-2892","1558-0644"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101455287","display_name":"Gang Hu","orcid":"https://orcid.org/0000-0003-4916-3460"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"funder","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Hu","raw_affiliation_strings":["Key Laboratory of Information Fusion Technology of Ministry of Education, Northwestern Polytechnical University, Xi'an, Shaanxi Province, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Information Fusion Technology of Ministry of Education, Northwestern Polytechnical University, Xi'an, Shaanxi Province, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030436858","display_name":"Zaidao Wen","orcid":"https://orcid.org/0000-0003-1258-7737"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"funder","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zaidao Wen","raw_affiliation_strings":["Key Laboratory of Information Fusion Technology of Ministry of Education, Northwestern Polytechnical University, Xi'an, Shaanxi Province, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Information Fusion Technology of Ministry of Education, Northwestern Polytechnical University, Xi'an, Shaanxi Province, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007396281","display_name":"Yafei Lv","orcid":"https://orcid.org/0000-0002-2779-5099"},"institutions":[{"id":"https://openalex.org/I4402554106","display_name":"Naval Research Academy","ror":"https://ror.org/02t42bb94","country_code":null,"type":"education","lineage":["https://openalex.org/I4402554106"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Yafei Lv","raw_affiliation_strings":["Naval Research Institute of PLA, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Naval Research Institute of PLA, Beijing, China","institution_ids":["https://openalex.org/I4402554106"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101482691","display_name":"Jianting Zhang","orcid":"https://orcid.org/0000-0002-0161-9716"},"institutions":[{"id":"https://openalex.org/I4402554106","display_name":"Naval Research Academy","ror":"https://ror.org/02t42bb94","country_code":null,"type":"education","lineage":["https://openalex.org/I4402554106"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Jianting Zhang","raw_affiliation_strings":["Naval Research Institute of PLA, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Naval Research Institute of PLA, Beijing, China","institution_ids":["https://openalex.org/I4402554106"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060985978","display_name":"Qian Wu","orcid":"https://orcid.org/0000-0001-5746-2677"},"institutions":[{"id":"https://openalex.org/I4210104252","display_name":"Air Force Engineering University","ror":"https://ror.org/00seraz22","country_code":"CN","type":"funder","lineage":["https://openalex.org/I4210104252"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qian Wu","raw_affiliation_strings":["Institute of Information and Navigation, Air Force Engineering University, Xi'an, Shaanxi Province, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information and Navigation, Air Force Engineering University, Xi'an, Shaanxi Province, China","institution_ids":["https://openalex.org/I4210104252"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.141,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.77028,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":77,"max":88},"biblio":{"volume":"62","issue":null,"first_page":"1","last_page":"15"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9905,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9905,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9861,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.9391,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5164652},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.41947097}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8203759},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5556335},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.51918995},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5164652},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.47037926},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.46410167},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.4595728},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.41947097},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.37905174},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3763849},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34230977},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.32086954},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.10533297},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tgrs.2024.3401031","pdf_url":null,"source":{"id":"https://openalex.org/S111326731","display_name":"IEEE Transactions on Geoscience and Remote Sensing","issn_l":"0196-2892","issn":["0196-2892","1558-0644"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.69,"display_name":"Reduced inequalities"}],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62206310"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"61806165"},{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"62176213"}],"datasets":[],"versions":[],"referenced_works_count":55,"referenced_works":["https://openalex.org/W1527575280","https://openalex.org/W1889081078","https://openalex.org/W2064675550","https://openalex.org/W2095483845","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2401246392","https://openalex.org/W2489434015","https://openalex.org/W2560730294","https://openalex.org/W2626107033","https://openalex.org/W2763822693","https://openalex.org/W2774267535","https://openalex.org/W2886702754","https://openalex.org/W2912371042","https://openalex.org/W2922388164","https://openalex.org/W2962784628","https://openalex.org/W2963530300","https://openalex.org/W2968124245","https://openalex.org/W2981537605","https://openalex.org/W2997786074","https://openalex.org/W3004137323","https://openalex.org/W3004827100","https://openalex.org/W3013436746","https://openalex.org/W3042481550","https://openalex.org/W3083147600","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3100245404","https://openalex.org/W3105475654","https://openalex.org/W3165084071","https://openalex.org/W3198377975","https://openalex.org/W3208803664","https://openalex.org/W4200633094","https://openalex.org/W4210894218","https://openalex.org/W4221145450","https://openalex.org/W4224911357","https://openalex.org/W4283721482","https://openalex.org/W4285595687","https://openalex.org/W4297808394","https://openalex.org/W4309396952","https://openalex.org/W4312275662","https://openalex.org/W4312626422","https://openalex.org/W4312651322","https://openalex.org/W4312784228","https://openalex.org/W4312913651","https://openalex.org/W4313055764","https://openalex.org/W4313121711","https://openalex.org/W4313260363","https://openalex.org/W4320855021","https://openalex.org/W4378696930","https://openalex.org/W4385245566","https://openalex.org/W4385899901","https://openalex.org/W4386212341","https://openalex.org/W4386790226","https://openalex.org/W4393158799"],"related_works":["https://openalex.org/W4389116644","https://openalex.org/W4388870064","https://openalex.org/W4235186151","https://openalex.org/W4205463238","https://openalex.org/W3103844505","https://openalex.org/W2761785940","https://openalex.org/W259157601","https://openalex.org/W2210139803","https://openalex.org/W2153315159","https://openalex.org/W1482209366"],"abstract_inverted_index":{"Cross-modal":[0],"remote-sensing":[1,14],"image-text":[2,146],"retrieval":[3,129,221],"(CMRSITR)":[4],"is":[5],"a":[6,31,110,117,134,197,224],"challenging":[7],"task":[8],"that":[9,168],"aims":[10],"to":[11,92,127],"retrieve":[12],"target":[13],"(RS)":[15],"images":[16,29,35,177],"based":[17,115],"on":[18,58,116],"textual":[19],"descriptions.":[20],"However,":[21,89],"the":[22,43,64,68,94,141,151,181,190],"modal":[23],"gap":[24],"between":[25],"texts":[26],"and":[27,39,48,70,78,82,99,148,178],"RS":[28,34,157,176],"poses":[30],"significant":[32],"challenge.":[33],"comprise":[36],"multiple":[37,154],"targets":[38,155],"complex":[40],"backgrounds,":[41],"necessitating":[42],"mining":[44],"of":[45,67,96,145,192],"both":[46,175],"global":[47,77,86,135,142],"local":[49,59,65,79,164,172,193,198],"information":[50,123,165,199],"for":[51,113],"effective":[52],"CMRSITR.":[53],"Existing":[54],"approaches":[55],"primarily":[56],"focus":[57],"image":[60,80,136],"features":[61,66,81,144],"while":[62],"disregarding":[63],"text":[69,87],"their":[71],"correspondence.":[72],"These":[73],"methods":[74,222],"typically":[75],"fuse":[76],"align":[83],"them":[84],"with":[85],"features.":[88],"they":[90],"struggle":[91],"eliminate":[93],"influence":[95],"cluttered":[97],"backgrounds":[98],"may":[100],"overlook":[101],"crucial":[102],"targets.":[103],"To":[104,185],"address":[105],"these":[106],"limitations,":[107],"we":[108,160,195],"propose":[109],"novel":[111],"framework":[112,132],"CMRSITR":[114,208],"transformer":[118],"architecture,":[119],"which":[120,139],"leverages":[121],"global-local":[122],"soft":[124],"alignment":[125,191],"(GLISA)":[126],"enhance":[128],"performance.":[130],"Our":[131],"incorporates":[133],"extraction":[137,166],"module,":[138],"captures":[140],"semantic":[143,187],"pairs":[147],"effectively":[149],"represents":[150],"relationships":[152],"among":[153],"in":[156],"images.":[158],"Additionally,":[159],"introduce":[161],"an":[162],"adaptive":[163],"module":[167],"adaptively":[169],"mines":[170],"discriminative":[171],"clues":[173],"from":[174],"texts,":[179],"aligning":[180],"corresponding":[182],"fine-grained":[183],"information.":[184],"mitigate":[186],"ambiguities":[188],"during":[189],"features,":[194],"design":[196],"soft-alignment":[200],"module.":[201],"In":[202],"comparative":[203],"evaluations":[204],"using":[205],"two":[206],"public":[207],"datasets,":[209],"our":[210],"proposed":[211],"method":[212],"achieves":[213],"state-of-the-art":[214],"results,":[215],"surpassing":[216],"not":[217],"only":[218],"traditional":[219],"cross-modal":[220],"by":[223],"substantial":[225],"margin,":[226],"but":[227],"also":[228],"other":[229],"CLIP-based":[230],"methods.":[231]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4396909863","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-04-15T07:51:20.843558","created_date":"2024-05-15"}