{"id":"https://openalex.org/W4403573584","doi":"https://doi.org/10.48550/arxiv.2410.11201","title":"Tree of Attributes Prompt Learning for Vision-Language Models","display_name":"Tree of Attributes Prompt Learning for Vision-Language Models","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4403573584","doi":"https://doi.org/10.48550/arxiv.2410.11201"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.11201","pdf_url":"http://arxiv.org/pdf/2410.11201","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2410.11201","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085416386","display_name":"Tong Ding","orcid":"https://orcid.org/0000-0002-9042-4979"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Tong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082419115","display_name":"Wanhua Li","orcid":"https://orcid.org/0000-0002-2730-0543"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wanhua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069470446","display_name":"Zhongqi Miao","orcid":"https://orcid.org/0000-0002-0439-8592"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miao, Zhongqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5043151044","display_name":"Hanspeter Pfister","orcid":"https://orcid.org/0000-0002-3620-2582"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pfister, Hanspeter","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9964,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9964,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9219,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9153,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.6193748}],"concepts":[{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.6193748},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.50325936},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45800328},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38035518},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22069436},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.062264204}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.11201","pdf_url":"http://arxiv.org/pdf/2410.11201","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.11201","pdf_url":"http://arxiv.org/pdf/2410.11201","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W3204019825","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Prompt":[0,53],"learning":[1,54],"has":[2],"proven":[3],"effective":[4],"in":[5,39,153],"adapting":[6],"vision":[7,83,121],"language":[8],"models":[9],"for":[10,74],"downstream":[11],"tasks.":[12],"However,":[13],"existing":[14,89],"methods":[15,90,183],"usually":[16],"append":[17],"learnable":[18],"prompt":[19,86],"tokens":[20],"solely":[21],"with":[22,66,82,96,110],"the":[23,35,40,49,80,127,137,145,154,185],"category":[24,41,94],"names":[25,95,112,147],"to":[26,32,60,124,169],"obtain":[27],"textual":[28],"features,":[29],"which":[30,56],"fails":[31],"fully":[33],"leverage":[34],"rich":[36],"context":[37],"indicated":[38],"name.":[42],"To":[43,158],"address":[44,159],"this":[45,160],"issue,":[46],"we":[47,162],"propose":[48],"Tree":[50],"of":[51,64,99],"Attributes":[52],"(TAP),":[55],"first":[57],"instructs":[58],"LLMs":[59],"generate":[61],"a":[62,67,97,165],"tree":[63],"attributes":[65],"\"concept":[68],"-":[69,71],"attribute":[70],"description\"":[72],"structure":[73],"each":[75],"category,":[76],"and":[77,84,120,139],"then":[78],"learn":[79,126],"hierarchy":[81],"text":[85,119,172],"tokens.":[87],"Unlike":[88],"that":[91,178],"merely":[92],"augment":[93],"set":[98],"unstructured":[100],"descriptions,":[101],"our":[102,116,179],"approach":[103,117,180],"essentially":[104],"distills":[105],"structured":[106],"knowledge":[107],"graphs":[108],"associated":[109],"class":[111,146],"from":[113],"LLMs.":[114],"Furthermore,":[115],"introduces":[118],"prompts":[122],"designed":[123],"explicitly":[125],"corresponding":[128],"visual":[129],"attributes,":[130],"effectively":[131],"serving":[132],"as":[133,191,193],"domain":[134],"experts.":[135],"Additionally,":[136],"general":[138],"diverse":[140,198],"descriptions":[141],"generated":[142],"based":[143],"on":[144,184],"may":[148],"be":[149],"wrong":[150],"or":[151],"absent":[152],"specific":[155],"given":[156],"images.":[157],"misalignment,":[161],"further":[163],"introduce":[164],"vision-conditional":[166],"pooling":[167],"module":[168],"extract":[170],"instance-specific":[171],"features.":[173],"Extensive":[174],"experimental":[175],"results":[176],"demonstrate":[177],"outperforms":[181],"state-of-the-art":[182],"zero-shot":[186],"base-to-novel":[187],"generalization,":[188],"cross-dataset":[189],"transfer,":[190],"well":[192],"few-shot":[194],"classification":[195],"across":[196],"11":[197],"datasets.":[199]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403573584","counts_by_year":[],"updated_date":"2025-04-19T23:04:27.978416","created_date":"2024-10-20"}