{"id":"https://openalex.org/W4401857212","doi":"https://doi.org/10.1145/3637528.3671640","title":"Bringing Multimodality to Amazon Visual Search System","display_name":"Bringing Multimodality to Amazon Visual Search System","publication_year":2024,"publication_date":"2024-08-24","ids":{"openalex":"https://openalex.org/W4401857212","doi":"https://doi.org/10.1145/3637528.3671640"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671640","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671640","source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671640","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102586120","display_name":"Xinliang Zhu","orcid":"https://orcid.org/0000-0002-4544-2078"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xinliang Zhu","raw_affiliation_strings":["Amazon.com, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Palo Alto, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088643171","display_name":"Sheng-Wei Huang","orcid":"https://orcid.org/0000-0002-0244-6335"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sheng-Wei Huang","raw_affiliation_strings":["Amazon.com, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Palo Alto, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103477831","display_name":"Han Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"funder","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Han Ding","raw_affiliation_strings":["Amazon.com, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049718088","display_name":"Jinyu Yang","orcid":"https://orcid.org/0000-0002-7004-3570"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinyu Yang","raw_affiliation_strings":["Amazon.com, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Palo Alto, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029648476","display_name":"Kelvin Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"funder","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kelvin Chen","raw_affiliation_strings":["Amazon.com, New York, New York, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, New York, New York, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113694554","display_name":"T. S. Zhou","orcid":null},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tao Zhou","raw_affiliation_strings":["Amazon.com, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Palo Alto, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048282838","display_name":"Tal Neiman","orcid":"https://orcid.org/0009-0005-0198-240X"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"funder","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tal Neiman","raw_affiliation_strings":["Amazon.com, New York, New York, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, New York, New York, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106708603","display_name":"Ouye Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"funder","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ouye Xie","raw_affiliation_strings":["Amazon.com, Seattle, WA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Seattle, WA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113705297","display_name":"S.M. Tran","orcid":null},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Son Tran","raw_affiliation_strings":["Amazon.com, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Palo Alto, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073471997","display_name":"Benjamin Yao","orcid":"https://orcid.org/0009-0005-8622-3540"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"funder","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Benjamin Yao","raw_affiliation_strings":["Amazon.com, Seattle, WA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Seattle, WA, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103324588","display_name":"Douglas Gray","orcid":"https://orcid.org/0009-0007-3509-582X"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Douglas Gray","raw_affiliation_strings":["Amazon.com, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Palo Alto, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108330927","display_name":"Anuj Bindal","orcid":null},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anuj Bindal","raw_affiliation_strings":["Amazon.com, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Palo Alto, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055026781","display_name":"Arnab Dhua","orcid":"https://orcid.org/0009-0007-8233-4301"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arnab Dhua","raw_affiliation_strings":["Amazon.com, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Palo Alto, CA, USA","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":78},"biblio":{"volume":null,"issue":null,"first_page":"6390","last_page":"6399"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9759,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multimodality","display_name":"Multimodality","score":0.863105}],"concepts":[{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.863105},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.63789123},{"id":"https://openalex.org/C535291247","wikidata":"https://www.wikidata.org/wiki/Q177567","display_name":"Amazon rainforest","level":2,"score":0.5552845},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36058402},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.337986},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2929334},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.06804934},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671640","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671640","source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.13364","pdf_url":"http://arxiv.org/pdf/2412.13364","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671640","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671640","source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":40,"referenced_works":["https://openalex.org/W2096733369","https://openalex.org/W2138621090","https://openalex.org/W2157364932","https://openalex.org/W2200092826","https://openalex.org/W2250384498","https://openalex.org/W2471768434","https://openalex.org/W2560662850","https://openalex.org/W2605102252","https://openalex.org/W2625758617","https://openalex.org/W2798825526","https://openalex.org/W2950628404","https://openalex.org/W2963037989","https://openalex.org/W2963150697","https://openalex.org/W2963290108","https://openalex.org/W2963775347","https://openalex.org/W2964271799","https://openalex.org/W2986277806","https://openalex.org/W3015557549","https://openalex.org/W3034202663","https://openalex.org/W3080750010","https://openalex.org/W3091588028","https://openalex.org/W3106778652","https://openalex.org/W3171007011","https://openalex.org/W3173220247","https://openalex.org/W3176641147","https://openalex.org/W3182707920","https://openalex.org/W3184735396","https://openalex.org/W3190434573","https://openalex.org/W3213454282","https://openalex.org/W3215626407","https://openalex.org/W4212836813","https://openalex.org/W4286906902","https://openalex.org/W4290927947","https://openalex.org/W4306820534","https://openalex.org/W4367365797","https://openalex.org/W4380715531","https://openalex.org/W4386065512","https://openalex.org/W4386071687","https://openalex.org/W4386076084","https://openalex.org/W4386076647"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W4388989680","https://openalex.org/W4302009627","https://openalex.org/W4212781403","https://openalex.org/W4205899436","https://openalex.org/W3111427900","https://openalex.org/W3022229171","https://openalex.org/W2901011898","https://openalex.org/W2748952813","https://openalex.org/W2462138927"],"abstract_inverted_index":{"Image":[0],"to":[1,51,61,84,146,150,153,173],"image":[2,30,42,99,152,154,168,196],"matching":[3,24,43,50,87,112,155,197],"has":[4],"been":[5],"well":[6],"studied":[7],"in":[8,65],"the":[9,28,85,93,101,128,151,186,202,210],"computer":[10],"vision":[11],"community.":[12],"Previous":[13],"studies":[14],"mainly":[15],"focus":[16],"on":[17,185,195],"training":[18],"a":[19,121,124,147],"deep":[20,77],"metric":[21,78],"learning":[22],"model":[23,102,161,204],"visual":[25,53,114],"patterns":[26],"between":[27,92],"query":[29,135],"and":[31,98,123,169,179,205],"gallery":[32],"images.":[33],"In":[34],"this":[35,57,143,160],"study,":[36],"we":[37,59,70,140,190],"show":[38,141,182],"that":[39,142],"pure":[40],"image-to-":[41],"suffers":[44],"from":[45,106,209],"false":[46],"positives":[47],"caused":[48],"by":[49],"local":[52],"patterns.":[54],"To":[55],"alleviate":[56],"issue,":[58],"propose":[60],"leverage":[62],"recent":[63],"advances":[64],"vision-language":[66],"pretraining":[67],"research.":[68],"Specifically,":[69,189],"introduce":[71],"additional":[72,90],"image-text":[73],"alignment":[74],"losses":[75],"into":[76],"learning,":[79],"which":[80,110,165],"serve":[81],"as":[82],"constraints":[83],"image-to-image":[86],"loss.":[88],"With":[89],"alignments":[91],"text":[94,134,171],"(e.g.,":[95],"product":[96],"title)":[97],"pairs,":[100],"can":[103],"learn":[104],"concepts":[105],"both":[107,167],"modalities":[108],"explicitly,":[109],"avoids":[111],"low-level":[113],"features.":[115],"We":[116,157],"progressively":[117],"develop":[118],"two":[119],"variants,":[120],"3-tower":[122,203],"4-tower":[125,211],"model,":[126],"where":[127],"latter":[129],"takes":[130,166],"one":[131],"more":[132],"short":[133],"input.":[136],"Through":[137],"extensive":[138],"experiments,":[139],"change":[144],"leads":[145],"substantial":[148],"improvement":[149,194,208],"problem.":[156],"further":[158,207],"leveraged":[159],"for":[162],"multimodal":[163],"search,":[164],"reformulation":[170],"queries":[172],"improve":[174],"search":[175],"quality.":[176],"Both":[177],"offline":[178],"online":[180],"experiments":[181],"strong":[183],"improvements":[184],"main":[187],"metrics.":[188],"see":[191],"4.95%":[192],"relative":[193],"click":[198],"through":[199],"rate":[200],"with":[201],"1.13%":[206],"model.":[212]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4401857212","counts_by_year":[],"updated_date":"2025-04-04T15:34:20.309956","created_date":"2024-08-25"}