{"id":"https://openalex.org/W4310825079","doi":"https://doi.org/10.48550/arxiv.2212.01758","title":"Improving Zero-shot Generalization and Robustness of Multi-modal Models","display_name":"Improving Zero-shot Generalization and Robustness of Multi-modal Models","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4310825079","doi":"https://doi.org/10.48550/arxiv.2212.01758"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2212.01758","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2212.01758","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038696465","display_name":"Yunhao Ge","orcid":"https://orcid.org/0000-0002-8110-9280"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ge, Yunhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100720534","display_name":"Jie Ren","orcid":"https://orcid.org/0000-0001-9918-3000"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ren, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100721973","display_name":"Yuxiao Wang","orcid":"https://orcid.org/0000-0002-4162-4587"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yuxiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031269302","display_name":"Andrew Gallagher","orcid":"https://orcid.org/0000-0001-8090-2239"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gallagher, Andrew","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418319","display_name":"Ming\u2013Hsuan Yang","orcid":"https://orcid.org/0000-0003-4848-2304"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Ming-Hsuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054494771","display_name":"Laurent Itti","orcid":"https://orcid.org/0000-0002-0168-2977"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Itti, Laurent","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030402556","display_name":"Hartwig Adam","orcid":"https://orcid.org/0000-0003-1258-4341"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adam, Hartwig","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012859570","display_name":"Balaji Lakshminarayanan","orcid":"https://orcid.org/0000-0002-3334-1659"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lakshminarayanan, Balaji","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101236333","display_name":"Jiaping Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Jiaping","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":59},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9989,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.982,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.46230853},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.44069633}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.75772905},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.57430124},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.51225084},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.49834347},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.462984},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.46230853},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4443731},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4415239},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.44069633},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17818189},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2212.01758","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2212.01758","pdf_url":"http://arxiv.org/pdf/2212.01758","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2212.01758","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2212.01758","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4390421286","https://openalex.org/W4389724018","https://openalex.org/W4360995913","https://openalex.org/W4318719684","https://openalex.org/W4318559728","https://openalex.org/W4312193868","https://openalex.org/W4280563792","https://openalex.org/W3183136280","https://openalex.org/W2775233965","https://openalex.org/W2140186469"],"abstract_inverted_index":{"Multi-modal":[0],"image-text":[1],"models":[2,31,176],"such":[3,134,222],"as":[4,223],"CLIP":[5,173],"and":[6,16,55,76,101,127,154,161,174,196,218,235],"LiT":[7,175],"have":[8],"demonstrated":[9],"impressive":[10],"performance":[11,53],"on":[12,118,133,171,192,198],"image":[13,102],"classification":[14],"benchmarks":[15],"their":[17],"zero-shot":[18,27,78],"generalization":[19],"ability":[20],"is":[21,87,228,246],"particularly":[22],"exciting.":[23],"While":[24],"the":[25,35,49,60,68,96,113,141,147,157,163,187,193,199],"top-5":[26],"accuracies":[28,37],"of":[29,59,95,140],"these":[30],"are":[32,38,63],"very":[33],"high,":[34],"top-1":[36,85,188],"much":[39],"lower":[40],"(over":[41],"25%":[42],"gap":[43,54],"in":[44,67],"some":[45],"cases).":[46],"We":[47,104,168,204],"investigate":[48],"reasons":[50],"for":[51],"this":[52],"find":[56],"that":[57,106,207],"many":[58],"failure":[61],"cases":[62],"caused":[64],"by":[65,92,137,150,190],"ambiguity":[66],"text":[69,166],"prompts.":[70,167],"First,":[71],"we":[72,123,145],"develop":[73],"a":[74,125],"simple":[75,126],"efficient":[77,128],"post-hoc":[79],"method":[80,185,209,227],"to":[81,89,130,240],"identify":[82],"images":[83,136],"whose":[84],"prediction":[86,120],"likely":[88],"be":[90,237],"incorrect,":[91],"measuring":[93],"consistency":[94],"predictions":[97],"w.r.t.":[98],"multiple":[99],"prompts":[100],"transformations.":[103],"show":[105,206],"our":[107,184,208],"procedure":[108],"better":[109],"predicts":[110],"mistakes,":[111],"outperforming":[112],"popular":[114],"max":[115],"logit":[116],"baseline":[117],"selective":[119],"tasks.":[121],"Next,":[122],"propose":[124],"way":[129],"improve":[131],"accuracy":[132,189],"uncertain":[135,194],"making":[138],"use":[139],"WordNet":[142],"hierarchy;":[143],"specifically":[144],"augment":[146],"original":[148],"class":[149],"incorporating":[151],"its":[152],"parent":[153],"children":[155],"from":[156],"semantic":[158],"label":[159],"hierarchy,":[160],"plug":[162],"augmentation":[164],"into":[165],"conduct":[169],"experiments":[170],"both":[172],"with":[177],"five":[178],"different":[179],"ImageNet-based":[180],"datasets.":[181],"For":[182],"CLIP,":[183],"improves":[186,210],"17.13%":[191],"subset":[195],"3.6%":[197],"entire":[200],"ImageNet":[201,212],"validation":[202],"set.":[203],"also":[205],"across":[211],"shifted":[213],"datasets,":[214,217],"four":[215],"other":[216,219,241],"model":[220,233],"architectures":[221],"LiT.":[224],"The":[225],"proposed":[226],"hyperparameter-free,":[229],"requires":[230],"no":[231],"additional":[232],"training":[234],"can":[236],"easily":[238],"scaled":[239],"large":[242],"multi-modal":[243],"architectures.":[244],"Code":[245],"available":[247],"at":[248],"https://github.com/gyhandy/Hierarchy-CLIP.":[249]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4310825079","counts_by_year":[],"updated_date":"2025-03-01T07:08:49.320309","created_date":"2022-12-18"}