{"id":"https://openalex.org/W4386875095","doi":"https://doi.org/10.48550/arxiv.2309.08912","title":"Delving into Multimodal Prompting for Fine-grained Visual Classification","display_name":"Delving into Multimodal Prompting for Fine-grained Visual Classification","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4386875095","doi":"https://doi.org/10.48550/arxiv.2309.08912"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2309.08912","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2309.08912","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020416270","display_name":"Jiang Xin","orcid":"https://orcid.org/0000-0003-3727-1288"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100662197","display_name":"Hao Tang","orcid":"https://orcid.org/0000-0003-2202-0515"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023088081","display_name":"Junyao Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Junyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102499994","display_name":"Xiaoyu Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Xiaoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056103024","display_name":"Shengfeng He","orcid":"https://orcid.org/0000-0002-3802-4644"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Shengfeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5017096005","display_name":"Zechao Li","orcid":"https://orcid.org/0000-0002-5341-5985"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zechao","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":65},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9952,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9929,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/subcategory","display_name":"Subcategory","score":0.57015604},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.42847228}],"concepts":[{"id":"https://openalex.org/C2780617661","wikidata":"https://www.wikidata.org/wiki/Q541563","display_name":"Subcategory","level":2,"score":0.57015604},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5507237},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.4676074},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.46478817},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.44723913},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4314174},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.42847228},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.34865186},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3428392},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.29032767},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2309.08912","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2309.08912","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2309.08912","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4298856889","https://openalex.org/W4226068787","https://openalex.org/W3157916708","https://openalex.org/W2963366818","https://openalex.org/W2178926482","https://openalex.org/W2168847017","https://openalex.org/W2148211421","https://openalex.org/W2009701419","https://openalex.org/W1547238018","https://openalex.org/W1514356479"],"abstract_inverted_index":{"Fine-grained":[0],"visual":[1,31],"classification":[2],"(FGVC)":[3],"involves":[4],"categorizing":[5],"fine":[6],"subdivisions":[7],"within":[8],"a":[9,78,97,102,142,151,164],"broader":[10],"category,":[11],"which":[12,118],"poses":[13],"challenges":[14],"due":[15],"to":[16,54,64,72,170],"subtle":[17],"inter-class":[18],"discrepancies":[19,123],"and":[20,76,101,113,130,137,177],"large":[21],"intra-class":[22],"variations.":[23],"However,":[24],"prevailing":[25],"approaches":[26],"primarily":[27],"focus":[28],"on":[29,87,159,186],"uni-modal":[30],"concepts.":[32],"Recent":[33],"advancements":[34],"in":[35,43,141],"pre-trained":[36,174],"vision-language":[37],"models":[38,53],"have":[39],"demonstrated":[40],"remarkable":[41],"performance":[42],"various":[44],"high-level":[45],"vision":[46,129,136],"tasks,":[47],"yet":[48],"the":[49,67,88,121,125,135,173,191],"applicability":[50],"of":[51,69,127,193],"such":[52],"FGVC":[55,74,188],"tasks":[56,75],"remains":[57],"uncertain.":[58],"In":[59],"this":[60],"paper,":[61],"we":[62,162],"aim":[63],"fully":[65,171],"exploit":[66],"capabilities":[68],"cross-modal":[70,147],"description":[71],"tackle":[73],"propose":[77],"novel":[79],"multimodal":[80,98,103],"prompting":[81,139],"solution,":[82],"denoted":[83],"as":[84],"MP-FGVC,":[85],"based":[86],"contrastive":[89],"language-image":[90],"pertaining":[91],"(CLIP)":[92],"model.":[93],"Our":[94],"MP-FGVC":[95,169],"comprises":[96],"prompts":[99],"scheme":[100],"adaptation":[104,180],"scheme.":[105],"The":[106,132],"former":[107],"includes":[108],"Subcategory-specific":[109],"Vision":[110],"Prompt":[111,116],"(SsVP)":[112],"Discrepancy-aware":[114],"Text":[115],"(DaTP),":[117],"explicitly":[119],"highlights":[120],"subcategory-specific":[122],"from":[124],"perspectives":[126],"both":[128],"language.":[131],"latter":[133],"aligns":[134],"text":[138],"elements":[140],"common":[143],"semantic":[144],"space,":[145],"facilitating":[146],"collaborative":[148],"reasoning":[149],"through":[150],"Vision-Language":[152],"Fusion":[153],"Module":[154],"(VLFM)":[155],"for":[156,168,181],"further":[157],"improvement":[158],"FGVC.":[160,182],"Moreover,":[161],"tailor":[163],"two-stage":[165],"optimization":[166],"strategy":[167],"leverage":[172],"CLIP":[175],"model":[176],"expedite":[178],"efficient":[179],"Extensive":[183],"experiments":[184],"conducted":[185],"four":[187],"datasets":[189],"demonstrate":[190],"effectiveness":[192],"our":[194],"MP-FGVC.":[195]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4386875095","counts_by_year":[],"updated_date":"2025-04-14T06:31:31.472624","created_date":"2023-09-20"}