{"id":"https://openalex.org/W3206012133","doi":"https://doi.org/10.1145/3474085.3475483","title":"Cross-modal Joint Prediction and Alignment for Composed Query Image Retrieval","display_name":"Cross-modal Joint Prediction and Alignment for Composed Query Image Retrieval","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3206012133","doi":"https://doi.org/10.1145/3474085.3475483","mag":"3206012133"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475483","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101455740","display_name":"Yuchen Yang","orcid":"https://orcid.org/0009-0000-3805-2293"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuchen Yang","raw_affiliation_strings":["University of Science and Technology of China, Anhui, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100340979","display_name":"Min Wang","orcid":"https://orcid.org/0000-0003-3048-6980"},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Wang","raw_affiliation_strings":["Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Anhui, China"],"affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Anhui, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046805800","display_name":"Wengang Zhou","orcid":"https://orcid.org/0000-0003-1690-9836"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wengang Zhou","raw_affiliation_strings":["University of Science and Technology of China & Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Anhui, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China & Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078141810","display_name":"Houqiang Li","orcid":"https://orcid.org/0000-0003-2188-3028"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Houqiang Li","raw_affiliation_strings":["University of Science and Technology of China & Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Anhui, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China & Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Anhui, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.905,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.748913,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":92},"biblio":{"volume":null,"issue":null,"first_page":"3303","last_page":"3311"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Visual Question Answering in Images and Videos","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Visual Question Answering in Images and Videos","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Image Feature Retrieval and Recognition Techniques","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Advances in Transfer Learning and Domain Adaptation","score":0.9935,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cross-modal-retrieval","display_name":"Cross-Modal Retrieval","score":0.603852},{"id":"https://openalex.org/keywords/feature-matching","display_name":"Feature Matching","score":0.592572},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image Retrieval","score":0.592109},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.550146},{"id":"https://openalex.org/keywords/image-captioning","display_name":"Image Captioning","score":0.547104},{"id":"https://openalex.org/keywords/visual-question-answering","display_name":"Visual Question Answering","score":0.515115}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8058462},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.6562598},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5945317},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.57715654},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.550146},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.53662},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.52176744},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.423628},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38863754},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37620342},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3556914},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475483","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.57,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"},{"score":0.42,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, justice, and strong institutions"}],"grants":[{"funder":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China","award_id":"61822208 and 62021001"}],"datasets":[],"versions":[],"referenced_works_count":50,"referenced_works":["https://openalex.org/W1895577753","https://openalex.org/W1933349210","https://openalex.org/W2402144811","https://openalex.org/W2471768434","https://openalex.org/W2519887557","https://openalex.org/W2544587078","https://openalex.org/W2598634450","https://openalex.org/W2605102252","https://openalex.org/W2620629206","https://openalex.org/W2626778328","https://openalex.org/W2745461083","https://openalex.org/W2765440071","https://openalex.org/W2900464008","https://openalex.org/W2902101981","https://openalex.org/W2905544595","https://openalex.org/W2909671697","https://openalex.org/W2912057614","https://openalex.org/W2939052861","https://openalex.org/W2949845221","https://openalex.org/W2950761309","https://openalex.org/W2950855501","https://openalex.org/W2953106684","https://openalex.org/W2953384591","https://openalex.org/W2963588253","https://openalex.org/W2964211610","https://openalex.org/W2971117179","https://openalex.org/W2973665503","https://openalex.org/W2979382951","https://openalex.org/W2982376094","https://openalex.org/W2982905682","https://openalex.org/W2988281744","https://openalex.org/W2990519439","https://openalex.org/W3002377091","https://openalex.org/W3007938835","https://openalex.org/W3010363512","https://openalex.org/W3014490631","https://openalex.org/W3026458074","https://openalex.org/W3034585290","https://openalex.org/W3035356601","https://openalex.org/W3035454331","https://openalex.org/W3035552787","https://openalex.org/W3092760908","https://openalex.org/W3092962760","https://openalex.org/W3093199520","https://openalex.org/W3093291618","https://openalex.org/W3094950914","https://openalex.org/W3100623370","https://openalex.org/W3103651098","https://openalex.org/W3109923227","https://openalex.org/W4234552385"],"related_works":["https://openalex.org/W3194422352","https://openalex.org/W3154990682","https://openalex.org/W2785900585","https://openalex.org/W2770593030","https://openalex.org/W2560201613","https://openalex.org/W2560191017","https://openalex.org/W2402761219","https://openalex.org/W2348892528","https://openalex.org/W2014728371","https://openalex.org/W2012531322"],"abstract_inverted_index":{"In":[0,88],"this":[1,71,150],"paper,":[2],"we":[3,73],"focus":[4,45],"on":[5,46,185],"the":[6,14,48,55,59,62,66,91,101,105,113,116,126,131,139,143,165,168,174,190,206,212],"composed":[7,22,84],"query":[8,33,60,85,102,117,140],"image":[9,34,64,86,103,118,141],"retrieval":[10],"task,":[11],"namely":[12],"retrieving":[13],"target":[15,63,106,127,144],"images":[16],"that":[17,197],"are":[18],"similar":[19,124],"to":[20,35,125,138,148,171],"a":[21,26,32,37,75,153],"query,":[23],"in":[24],"which":[25],"modification":[27,67,92,120,132],"text":[28,93,121,133],"is":[29,94],"combined":[30],"with":[31,205],"describe":[36],"user's":[38],"accurate":[39],"search":[40],"intention.":[41],"Previous":[42],"methods":[43,170,208],"usually":[44],"learning":[47],"joint":[49,78],"image-text":[50],"representations,":[51],"but":[52,129],"rarely":[53],"consider":[54],"intrinsic":[56],"relationship":[57,151],"among":[58],"image,":[61,128],"and":[65,80,104,119,142,176,180],"text.":[68],"To":[69],"address":[70],"problem,":[72],"propose":[74],"new":[76],"cross-modal":[77],"prediction":[79],"alignment":[81],"framework":[82,161],"for":[83],"retrieval.":[87],"our":[89,193,198],"framework,":[90,195],"regarded":[95],"as":[96],"an":[97],"implicit":[98],"transformation":[99],"between":[100],"image.":[107,145],"Motivated":[108],"by":[109,152],"that,":[110],"not":[111],"only":[112],"combination":[114],"of":[115,178,192],"should":[122,134],"be":[123,135,202],"also":[130],"predicted":[136],"according":[137],"We":[146],"devote":[147],"aligning":[149],"novel":[154],"Joint":[155],"Prediction":[156],"Module":[157],"(JPM).":[158],"Our":[159],"proposed":[160,194,199],"can":[162,201],"seamlessly":[163],"incorporate":[164],"JPM":[166,200],"into":[167],"existing":[169,207],"effectively":[172,210],"improve":[173],"discrimination":[175],"robustness":[177],"visual":[179],"textual":[181],"representations.":[182],"The":[183],"experiments":[184],"three":[186],"public":[187],"datasets":[188],"demonstrate":[189],"effectiveness":[191],"proving":[196],"simply":[203],"incorporated":[204],"while":[209],"improving":[211],"performance.":[213]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3206012133","counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":4}],"updated_date":"2024-11-27T09:42:22.071143","created_date":"2021-10-25"}