{"id":"https://openalex.org/W4393117268","doi":"https://doi.org/10.1049/cvi2.12280","title":"Tag\u2010inferring and tag\u2010guided Transformer for image captioning","display_name":"Tag\u2010inferring and tag\u2010guided Transformer for image captioning","publication_year":2024,"publication_date":"2024-03-22","ids":{"openalex":"https://openalex.org/W4393117268","doi":"https://doi.org/10.1049/cvi2.12280"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1049/cvi2.12280","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1049/cvi2.12280","source":{"id":"https://openalex.org/S166929102","display_name":"IET Computer Vision","issn_l":"1751-9632","issn":["1751-9632","1751-9640"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311714","host_organization_name":"Institution of Engineering and Technology","host_organization_lineage":["https://openalex.org/P4310311714"],"host_organization_lineage_names":["Institution of Engineering and Technology"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1049/cvi2.12280","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005954246","display_name":"Yaohua Yi","orcid":"https://orcid.org/0000-0003-2456-6845"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaohua Yi","raw_affiliation_strings":["School of Remote Sensing and Information Engineering Wuhan University Wuhan Hubei China"],"affiliations":[{"raw_affiliation_string":"School of Remote Sensing and Information Engineering Wuhan University Wuhan Hubei China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104263630","display_name":"Yinkai Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinkai Liang","raw_affiliation_strings":["Research Center of Digital Imaging and Intelligent Perception Wuhan University Wuhan Hubei China","School of Remote Sensing and Information Engineering Wuhan University Wuhan Hubei China"],"affiliations":[{"raw_affiliation_string":"Research Center of Digital Imaging and Intelligent Perception Wuhan University Wuhan Hubei China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"School of Remote Sensing and Information Engineering Wuhan University Wuhan Hubei China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032659335","display_name":"Dezhu Kong","orcid":null},"institutions":[],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dezhu Kong","raw_affiliation_strings":["Zhuhai Pantum Electronics Co. Ltd. Zhuhai China"],"affiliations":[{"raw_affiliation_string":"Zhuhai Pantum Electronics Co. Ltd. Zhuhai China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082223646","display_name":"Ziwei Tang","orcid":"https://orcid.org/0000-0001-6780-8432"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziwei Tang","raw_affiliation_strings":["Research Center of Digital Imaging and Intelligent Perception Wuhan University Wuhan Hubei China","School of Remote Sensing and Information Engineering Wuhan University Wuhan Hubei China"],"affiliations":[{"raw_affiliation_string":"School of Remote Sensing and Information Engineering Wuhan University Wuhan Hubei China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"Research Center of Digital Imaging and Intelligent Perception Wuhan University Wuhan Hubei China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017879423","display_name":"Jibing Peng","orcid":null},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jibing Peng","raw_affiliation_strings":["Research Center of Digital Imaging and Intelligent Perception Wuhan University Wuhan Hubei China","Zhuhai Pantum Electronics Co. Ltd. Zhuhai China"],"affiliations":[{"raw_affiliation_string":"Zhuhai Pantum Electronics Co. Ltd. Zhuhai China","institution_ids":[]},{"raw_affiliation_string":"Research Center of Digital Imaging and Intelligent Perception Wuhan University Wuhan Hubei China","institution_ids":["https://openalex.org/I37461747"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":2000,"currency":"EUR","value_usd":2200,"provenance":"doaj"},"apc_paid":{"value":2000,"currency":"EUR","value_usd":2200,"provenance":"doaj"},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9945,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9746,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9443703},{"id":"https://openalex.org/keywords/image-captioning","display_name":"Image Captioning","score":0.6538},{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language Understanding","score":0.516634},{"id":"https://openalex.org/keywords/feature-matching","display_name":"Feature Matching","score":0.51441},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image Retrieval","score":0.502079}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9443703},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.655471},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6447078},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5650866},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.45991436},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45627278},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3729601},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13070145},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.08315796},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1049/cvi2.12280","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1049/cvi2.12280","source":{"id":"https://openalex.org/S166929102","display_name":"IET Computer Vision","issn_l":"1751-9632","issn":["1751-9632","1751-9640"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311714","host_organization_name":"Institution of Engineering and Technology","host_organization_lineage":["https://openalex.org/P4310311714"],"host_organization_lineage_names":["Institution of Engineering and Technology"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1049/cvi2.12280","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1049/cvi2.12280","source":{"id":"https://openalex.org/S166929102","display_name":"IET Computer Vision","issn_l":"1751-9632","issn":["1751-9632","1751-9640"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311714","host_organization_name":"Institution of Engineering and Technology","host_organization_lineage":["https://openalex.org/P4310311714"],"host_organization_lineage_names":["Institution of Engineering and Technology"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.41,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[{"funder":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China","award_id":"2021YFB2206200"}],"datasets":[],"versions":[],"referenced_works_count":25,"referenced_works":["https://openalex.org/W1593271688","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1897761818","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W2133459682","https://openalex.org/W2506483933","https://openalex.org/W2550553598","https://openalex.org/W2575842049","https://openalex.org/W2612690371","https://openalex.org/W2745461083","https://openalex.org/W2942992487","https://openalex.org/W2963084599","https://openalex.org/W2963101956","https://openalex.org/W2986670728","https://openalex.org/W3034655362","https://openalex.org/W3034733309","https://openalex.org/W3035160838","https://openalex.org/W3035284526","https://openalex.org/W4220842168","https://openalex.org/W4289542422","https://openalex.org/W4301409532","https://openalex.org/W4309701236","https://openalex.org/W4387521427"],"related_works":["https://openalex.org/W4388184981","https://openalex.org/W4323777661","https://openalex.org/W4310447809","https://openalex.org/W4298897568","https://openalex.org/W4290852288","https://openalex.org/W4200243030","https://openalex.org/W3215212336","https://openalex.org/W3209117276","https://openalex.org/W3164229987","https://openalex.org/W2800782462"],"abstract_inverted_index":{"Abstract":[0],"Image":[1],"captioning":[2,56],"is":[3,65,125,138],"an":[4],"important":[5],"task":[6],"for":[7,41,54,143],"understanding":[8],"images.":[9],"Recently,":[10],"many":[11],"studies":[12],"have":[13,35],"used":[14],"tags":[15,34,70,79,155],"to":[16,57,77,98,111,120,140,156],"build":[17],"alignments":[18],"between":[19],"image":[20,43,55,113],"information":[21],"and":[22,51,107,117,160,171],"language":[23,118],"information.":[24,83],"However,":[25],"existing":[26],"methods":[27],"ignore":[28],"the":[29,38,46,69,73,86,100,105,128,136,144,150,176],"problem":[30],"that":[31,94,149,161],"simple":[32],"semantic":[33,82,123],"difficulty":[36],"expressing":[37],"detailed":[39],"semantics":[40],"different":[42],"contents.":[44],"Therefore,":[45],"authors":[47],"propose":[48],"a":[49,62,91,167],"tag\u2010inferring":[50,63],"tag\u2010guided":[52,92],"Transformer":[53],"generate":[58,141],"fine\u2010grained":[59],"captions.":[60],"First,":[61],"encoder":[64],"proposed,":[66],"which":[67],"uses":[68],"extracted":[71],"by":[72],"scene":[74],"graph":[75],"model":[76],"infer":[78],"with":[80,85,166],"deeper":[81],"Then,":[84],"obtained":[87],"deep":[88],"tag":[89,115],"information,":[90],"decoder":[93],"includes":[95],"short\u2010term":[96],"attention":[97,110],"improve":[99],"features":[101,116,119,124],"of":[102,132],"words":[103],"in":[104,135],"sentence":[106],"gated":[108],"cross\u2010modal":[109],"combine":[112,154],"features,":[114],"produce":[121],"informative":[122],"proposed.":[126],"Finally,":[127],"word":[129],"probability":[130],"distribution":[131],"all":[133],"positions":[134],"sequence":[137],"calculated":[139],"descriptions":[142],"image.":[145],"The":[146],"experiments":[147],"demonstrate":[148],"authors\u2019":[151],"method":[152],"can":[153],"obtain":[157],"precise":[158],"captions":[159],"it":[162],"achieves":[163],"competitive":[164],"performance":[165],"40.6%":[168],"BLEU\u20104":[169],"score":[170,174],"135.3%":[172],"CIDEr":[173],"on":[175],"MSCOCO":[177],"data":[178],"set.":[179]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4393117268","counts_by_year":[],"updated_date":"2024-12-05T12:32:59.688622","created_date":"2024-03-24"}