{"id":"https://openalex.org/W4389982500","doi":"https://doi.org/10.48550/arxiv.2312.10104","title":"ICD-LM: Configuring Vision-Language In-Context Demonstrations by Language Modeling","display_name":"ICD-LM: Configuring Vision-Language In-Context Demonstrations by Language Modeling","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4389982500","doi":"https://doi.org/10.48550/arxiv.2312.10104"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2312.10104","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2312.10104","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101282002","display_name":"Yingzhe Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng, Yingzhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014490044","display_name":"Yang Xu","orcid":"https://orcid.org/0000-0002-0958-8547"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019827095","display_name":"Haoxuan Ma","orcid":"https://orcid.org/0000-0002-4852-6007"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Haoxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060199267","display_name":"Shuo Xu","orcid":"https://orcid.org/0000-0002-8602-1819"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Shuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103281435","display_name":"Chi Zhang","orcid":"https://orcid.org/0000-0003-0713-3722"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Chi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100738290","display_name":"Yucheng Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Yucheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5042324027","display_name":"Hanwang Zhang","orcid":"https://orcid.org/0000-0001-7374-8739"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Hanwang","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.762268,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":78,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.9855,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9791,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.67993504},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5131617},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4331142}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8295181},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.7117286},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.67993504},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.66380155},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6511398},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.61856914},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5751276},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5131617},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5118684},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43868938},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4331142},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.107126296},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.10361308},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.08878869},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.07718256},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2312.10104","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2312.10104","pdf_url":"http://arxiv.org/pdf/2312.10104","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2312.10104","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2312.10104","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.72,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4388893791","https://openalex.org/W4290852288","https://openalex.org/W4283207562","https://openalex.org/W4210416330","https://openalex.org/W3217195652","https://openalex.org/W3088136942","https://openalex.org/W2963177403","https://openalex.org/W2949362007","https://openalex.org/W2944691285","https://openalex.org/W2775506363"],"abstract_inverted_index":{"This":[0,80],"paper":[1],"studies":[2,192],"how":[3,118],"to":[4,17,75,96,115,119,142],"configure":[5],"powerful":[6],"In-Context":[7,22],"Demonstration":[8],"(ICD)":[9],"sequences":[10,88],"for":[11,89,139,186],"a":[12,33,38,43,52,83,152,183],"Large":[13],"Vision-Language":[14,19],"Model":[15,71,185],"(LVLM)":[16],"solve":[18],"tasks":[20],"through":[21,169],"Learning":[23],"(ICL).":[24],"After":[25],"observing":[26],"that":[27,108],"configuring":[28],"an":[29,55,68],"ICD":[30,56,69,78,87,148,187],"sequence":[31,57],"is":[32,162,210],"mirror":[34],"process":[35],"of":[36,85,127,146,181,197],"composing":[37],"sentence,":[39],"i.e.,":[40],"just":[41],"as":[42],"sentence":[44],"can":[45,58],"be":[46,60],"composed":[47],"word":[48,50],"by":[49,63,159],"via":[51],"Language":[53,70,184],"Model,":[54],"also":[59,163],"configured":[61],"one":[62],"one.":[64],"Consequently,":[65],"we":[66,134],"introduce":[67],"(ICD-LM)":[72],"specifically":[73],"designed":[74],"generate":[76],"effective":[77],"sequences.":[79,129],"involves":[81],"creating":[82],"dataset":[84,154,161,199],"hand-crafted":[86],"various":[90,198],"query":[91],"samples":[92],"and":[93,110,121,155,175,201],"using":[94,182],"it":[95],"train":[97],"the":[98,125,128,136,144,156,179,195,206],"ICD-LM.":[99],"Our":[100,189],"approach,":[101],"diverging":[102],"from":[103],"traditional":[104],"methods":[105],"in":[106,151,171,212],"NLP":[107],"select":[109,120],"order":[111,122],"ICDs":[112],"separately,":[113],"enables":[114],"simultaneously":[116],"learn":[117],"ICDs,":[123],"enhancing":[124],"effect":[126],"Moreover,":[130],"during":[131],"data":[132],"construction,":[133],"use":[135],"LVLM":[137],"intended":[138],"ICL":[140],"implementation":[141],"validate":[143,166],"strength":[145],"each":[147],"sequence,":[149],"resulting":[150],"model-specific":[153],"ICD-LM":[157,202],"trained":[158],"this":[160],"model-specific.":[164],"We":[165],"our":[167],"methodology":[168],"experiments":[170],"Visual":[172],"Question":[173],"Answering":[174],"Image":[176],"Captioning,":[177],"confirming":[178],"viability":[180],"configuration.":[188],"comprehensive":[190],"ablation":[191],"further":[193],"explore":[194],"impact":[196],"construction":[200],"development":[203],"settings":[204],"on":[205],"outcomes.":[207],"The":[208],"code":[209],"given":[211],"https://github.com/ForJadeForest/ICD-LM.":[213]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4389982500","counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-01-04T13:47:42.307288","created_date":"2023-12-20"}