{"id":"https://openalex.org/W4324321291","doi":"https://doi.org/10.48550/arxiv.2303.06594","title":"ChatGPT Asks, BLIP-2 Answers: Automatic Questioning Towards Enriched Visual Descriptions","display_name":"ChatGPT Asks, BLIP-2 Answers: Automatic Questioning Towards Enriched Visual Descriptions","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4324321291","doi":"https://doi.org/10.48550/arxiv.2303.06594"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2303.06594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2303.06594","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082616743","display_name":"Deyao Zhu","orcid":"https://orcid.org/0000-0001-8014-7309"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Deyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100450148","display_name":"Jun Chen","orcid":"https://orcid.org/0000-0001-8883-0970"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076162916","display_name":"Kilichbek Haydarov","orcid":"https://orcid.org/0000-0002-3062-2228"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haydarov, Kilichbek","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020511969","display_name":"Xiaoqian Shen","orcid":"https://orcid.org/0000-0001-6284-520X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Xiaoqian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100629630","display_name":"Wenxuan Zhang","orcid":"https://orcid.org/0000-0002-3947-2991"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Wenxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5085089542","display_name":"Mohamed Elhoseiny","orcid":"https://orcid.org/0000-0001-9659-1551"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elhoseiny, Mohamed","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.999882,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9791,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9583,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.7376986},{"id":"https://openalex.org/keywords/ask-price","display_name":"Ask price","score":0.5811766},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4504222}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.7376986},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6809198},{"id":"https://openalex.org/C157659113","wikidata":"https://www.wikidata.org/wiki/Q533822","display_name":"WordNet","level":2,"score":0.66038287},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.65670407},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.61368114},{"id":"https://openalex.org/C90329073","wikidata":"https://www.wikidata.org/wiki/Q914232","display_name":"Ask price","level":2,"score":0.5811766},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.48691022},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.46232986},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4586042},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4504222},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42311233},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37489933},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.27318949},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09570503},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C136264566","wikidata":"https://www.wikidata.org/wiki/Q159810","display_name":"Economy","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2303.06594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2303.06594","pdf_url":"http://arxiv.org/pdf/2303.06594","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2303.06594","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2303.06594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality education","score":0.75}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4388893791","https://openalex.org/W4290852288","https://openalex.org/W4283207562","https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2963177403","https://openalex.org/W2949522393","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W2330246314"],"abstract_inverted_index":{"Asking":[0],"insightful":[1],"questions":[2,54,97],"is":[3,89,117,194],"crucial":[4],"for":[5,170],"acquiring":[6,109],"knowledge":[7],"and":[8,139,141],"expanding":[9],"our":[10],"understanding":[11],"of":[12,18,40,95],"the":[13,16,37,172,183],"world.":[14],"However,":[15],"importance":[17],"questioning":[19,71],"has":[20],"been":[21,30],"largely":[22],"overlooked":[23],"in":[24,84],"AI":[25],"research,":[26],"where":[27],"models":[28,43],"have":[29],"primarily":[31],"developed":[32],"to":[33,51,67,91,100,119],"answer":[34],"questions.":[35],"With":[36],"recent":[38],"advancements":[39],"large":[41],"language":[42],"(LLMs)":[44],"like":[45],"ChatGPT,":[46],"we":[47,76],"discover":[48],"their":[49],"capability":[50],"ask":[52,92],"high-quality":[53],"when":[55],"provided":[56],"with":[57,144],"a":[58,64,79,93,102],"suitable":[59],"prompt.":[60],"This":[61],"discovery":[62],"presents":[63],"new":[65,110],"opportunity":[66],"develop":[68],"an":[69],"automatic":[70],"system.":[72],"In":[73],"this":[74],"paper,":[75],"introduce":[77],"ChatCaptioner,":[78],"novel":[80],"automatic-questioning":[81],"method":[82],"deployed":[83],"image":[85,123,131,174,184],"captioning.":[86],"Here,":[87],"ChatGPT":[88],"prompted":[90],"series":[94],"informative":[96],"about":[98],"images":[99],"BLIP-2,":[101],"strong":[103],"vision":[104],"question-answering":[105],"model.":[106],"By":[107],"keeping":[108],"visual":[111],"information":[112],"from":[113,167],"BLIP-2's":[114],"answers,":[115],"ChatCaptioner":[116,143,177],"able":[118],"generate":[120],"more":[121,159,180],"enriched":[122],"descriptions.":[124],"We":[125],"conduct":[126],"human-subject":[127],"evaluations":[128],"on":[129],"common":[130],"caption":[132],"datasets":[133],"such":[134],"as":[135,146,148,164],"COCO,":[136],"Conceptual":[137],"Caption,":[138],"WikiArt,":[140],"compare":[142],"BLIP-2":[145,186],"well":[147],"ground":[149],"truth.":[150],"Our":[151],"results":[152],"demonstrate":[153],"that":[154],"ChatCaptioner's":[155],"captions":[156],"are":[157],"significantly":[158],"informative,":[160],"receiving":[161],"three":[162],"times":[163],"many":[165],"votes":[166],"human":[168],"evaluators":[169],"providing":[171],"most":[173],"information.":[175],"Besides,":[176],"identifies":[178],"53%":[179],"objects":[181],"within":[182],"than":[185],"alone":[187],"measured":[188],"by":[189],"WordNet":[190],"synset":[191],"matching.":[192],"Code":[193],"available":[195],"at":[196],"https://github.com/Vision-CAIR/ChatCaptioner":[197]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4324321291","counts_by_year":[{"year":2024,"cited_by_count":21},{"year":2023,"cited_by_count":10}],"updated_date":"2025-01-06T19:34:57.218017","created_date":"2023-03-16"}