{"id":"https://openalex.org/W4400241693","doi":"https://doi.org/10.1007/s12652-024-04824-9","title":"A transformer-based Urdu image caption generation","display_name":"A transformer-based Urdu image caption generation","publication_year":2024,"publication_date":"2024-07-02","ids":{"openalex":"https://openalex.org/W4400241693","doi":"https://doi.org/10.1007/s12652-024-04824-9"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s12652-024-04824-9","pdf_url":null,"source":{"id":"https://openalex.org/S48031226","display_name":"Journal of Ambient Intelligence and Humanized Computing","issn_l":"1868-5137","issn":["1868-5137","1868-5145"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310319900"],"host_organization_lineage_names":["Springer Nature","Springer Science+Business Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1007/s12652-024-04824-9","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031791299","display_name":"Muhammad N.S. Hadi","orcid":"https://orcid.org/0000-0002-6490-889X"},"institutions":[{"id":"https://openalex.org/I1323252656","display_name":"Information Technology University","ror":"https://ror.org/00ngv8j44","country_code":"PK","type":"funder","lineage":["https://openalex.org/I1323252656"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Muhammad Hadi","raw_affiliation_strings":["Department of Computer Science, Information Technology University, Lahore, Pakistan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Information Technology University, Lahore, Pakistan","institution_ids":["https://openalex.org/I1323252656"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036413682","display_name":"Iqra Safder","orcid":"https://orcid.org/0000-0001-9818-4693"},"institutions":[{"id":"https://openalex.org/I201384688","display_name":"National University of Computer and Emerging Sciences","ror":"https://ror.org/003eyb898","country_code":"PK","type":"funder","lineage":["https://openalex.org/I201384688"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Iqra Safder","raw_affiliation_strings":["Department of Computer Science, National University of Computer & Emerging Sciences, Lahore, Pakistan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, National University of Computer & Emerging Sciences, Lahore, Pakistan","institution_ids":["https://openalex.org/I201384688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051532339","display_name":"Hajra Waheed","orcid":"https://orcid.org/0000-0003-0168-0063"},"institutions":[{"id":"https://openalex.org/I201384688","display_name":"National University of Computer and Emerging Sciences","ror":"https://ror.org/003eyb898","country_code":"PK","type":"funder","lineage":["https://openalex.org/I201384688"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Hajra Waheed","raw_affiliation_strings":["Department of Computer Science, National University of Computer & Emerging Sciences, Lahore, Pakistan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, National University of Computer & Emerging Sciences, Lahore, Pakistan","institution_ids":["https://openalex.org/I201384688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068902908","display_name":"Farooq Zaman","orcid":"https://orcid.org/0000-0002-9861-4013"},"institutions":[{"id":"https://openalex.org/I1323252656","display_name":"Information Technology University","ror":"https://ror.org/00ngv8j44","country_code":"PK","type":"funder","lineage":["https://openalex.org/I1323252656"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Farooq Zaman","raw_affiliation_strings":["Department of Computer Science, Information Technology University, Lahore, Pakistan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Information Technology University, Lahore, Pakistan","institution_ids":["https://openalex.org/I1323252656"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103081232","display_name":"Naif Radi Aljohani","orcid":"https://orcid.org/0000-0001-6150-5684"},"institutions":[{"id":"https://openalex.org/I185163786","display_name":"King Abdulaziz University","ror":"https://ror.org/02ma4wv74","country_code":"SA","type":"funder","lineage":["https://openalex.org/I185163786"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Naif Radi Aljohani","raw_affiliation_strings":["Faculty of Computing and Information Technology, King Abdulaziz University, Jeddah, Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"Faculty of Computing and Information Technology, King Abdulaziz University, Jeddah, Saudi Arabia","institution_ids":["https://openalex.org/I185163786"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087735486","display_name":"Raheel Nawaz","orcid":"https://orcid.org/0000-0001-9588-0052"},"institutions":[{"id":"https://openalex.org/I198012923","display_name":"University of Staffordshire","ror":"https://ror.org/00d6k8y35","country_code":"GB","type":"funder","lineage":["https://openalex.org/I198012923"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Raheel Nawaz","raw_affiliation_strings":["Staffordshire University, Stoke-on-Trent, UK"],"affiliations":[{"raw_affiliation_string":"Staffordshire University, Stoke-on-Trent, UK","institution_ids":["https://openalex.org/I198012923"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072423562","display_name":"Saeed\u2010Ul Hassan","orcid":"https://orcid.org/0000-0002-6509-9190"},"institutions":[{"id":"https://openalex.org/I11983389","display_name":"Manchester Metropolitan University","ror":"https://ror.org/02hstj355","country_code":"GB","type":"funder","lineage":["https://openalex.org/I11983389"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Saeed Ul Hassan","raw_affiliation_strings":["Department of Computing and Mathematics, Manchester Metropolitan University, Manchester, UK"],"affiliations":[{"raw_affiliation_string":"Department of Computing and Mathematics, Manchester Metropolitan University, Manchester, UK","institution_ids":["https://openalex.org/I11983389"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108768608","display_name":"Raheem Sarwar","orcid":null},"institutions":[{"id":"https://openalex.org/I11983389","display_name":"Manchester Metropolitan University","ror":"https://ror.org/02hstj355","country_code":"GB","type":"funder","lineage":["https://openalex.org/I11983389"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Raheem Sarwar","raw_affiliation_strings":["Faculty of Business and Law, Manchester Metropolitan University, Manchester, UK"],"affiliations":[{"raw_affiliation_string":"Faculty of Business and Law, Manchester Metropolitan University, Manchester, UK","institution_ids":["https://openalex.org/I11983389"]}]}],"institution_assertions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5108768608"],"corresponding_institution_ids":["https://openalex.org/I11983389"],"apc_list":{"value":2690,"currency":"EUR","value_usd":3390},"apc_paid":{"value":2690,"currency":"EUR","value_usd":3390},"fwci":0.0,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":1,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":81,"max":91},"biblio":{"volume":"15","issue":"9","first_page":"3441","last_page":"3457"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9961,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9929,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/urdu","display_name":"Urdu","score":0.72669864}],"concepts":[{"id":"https://openalex.org/C2777350258","wikidata":"https://www.wikidata.org/wiki/Q1617","display_name":"Urdu","level":2,"score":0.72669864},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.60936886},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.50484663},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36405265},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.32539475},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.18044627},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.16143575},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.15319437},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.05505407},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s12652-024-04824-9","pdf_url":null,"source":{"id":"https://openalex.org/S48031226","display_name":"Journal of Ambient Intelligence and Humanized Computing","issn_l":"1868-5137","issn":["1868-5137","1868-5145"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310319900"],"host_organization_lineage_names":["Springer Nature","Springer Science+Business Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1007/s12652-024-04824-9","pdf_url":null,"source":{"id":"https://openalex.org/S48031226","display_name":"Journal of Ambient Intelligence and Humanized Computing","issn_l":"1868-5137","issn":["1868-5137","1868-5145"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319965","https://openalex.org/P4310319900"],"host_organization_lineage_names":["Springer Nature","Springer Science+Business Media"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":38,"referenced_works":["https://openalex.org/W1574818812","https://openalex.org/W1861492603","https://openalex.org/W1895989618","https://openalex.org/W1933349210","https://openalex.org/W1987835821","https://openalex.org/W2124386111","https://openalex.org/W2139380585","https://openalex.org/W2161969291","https://openalex.org/W2604178507","https://openalex.org/W2886641317","https://openalex.org/W2962968835","https://openalex.org/W2963686907","https://openalex.org/W2977928982","https://openalex.org/W2990595241","https://openalex.org/W3034655362","https://openalex.org/W3037763555","https://openalex.org/W3042309530","https://openalex.org/W3082891133","https://openalex.org/W3091588028","https://openalex.org/W3100532166","https://openalex.org/W3106353077","https://openalex.org/W3107492437","https://openalex.org/W3107503524","https://openalex.org/W3107848485","https://openalex.org/W3173199533","https://openalex.org/W3198248383","https://openalex.org/W4221165890","https://openalex.org/W4237106816","https://openalex.org/W4296500582","https://openalex.org/W4306147333","https://openalex.org/W4311310217","https://openalex.org/W4363678548","https://openalex.org/W4364379010","https://openalex.org/W4385570460","https://openalex.org/W4386072307","https://openalex.org/W4386076004","https://openalex.org/W4389454722","https://openalex.org/W68733909"],"related_works":["https://openalex.org/W3116076068","https://openalex.org/W2951359407","https://openalex.org/W2775347418","https://openalex.org/W2772917594","https://openalex.org/W2755342338","https://openalex.org/W2229312674","https://openalex.org/W2166024367","https://openalex.org/W2079911747","https://openalex.org/W2058170566","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Abstract":[0],"Image":[1],"caption":[2,70],"generation":[3],"has":[4],"emerged":[5],"as":[6,42],"a":[7,83,124],"remarkable":[8],"development":[9,204],"that":[10,154],"bridges":[11],"the":[12,26,92,105,183,196,203],"gap":[13],"between":[14],"Natural":[15],"Language":[16],"Processing":[17],"(NLP)":[18],"and":[19,31,94,148,165,177,207],"Computer":[20],"Vision":[21],"(CV).":[22],"It":[23],"lies":[24],"at":[25],"intersection":[27],"of":[28,96,104,198,205],"these":[29],"fields":[30],"presents":[32],"unique":[33],"challenges,":[34],"particularly":[35],"when":[36],"dealing":[37],"with":[38,141,187],"low-resource":[39,191,211],"languages":[40],"such":[41],"Urdu.":[43],"Limited":[44],"research":[45],"on":[46,190],"basic":[47],"Urdu":[48,68,118,171],"language":[49,212],"understanding":[50],"necessitates":[51],"further":[52],"exploration":[53],"in":[54,81,113,210],"this":[55,58],"domain.":[56],"In":[57],"study,":[59],"we":[60,99],"propose":[61],"three":[62,129],"Seq2Seq-based":[63],"architectures":[64],"specifically":[65],"tailored":[66],"for":[67,201],"image":[69,172],"generation.":[71],"Our":[72,120,180,193],"approach":[73],"involves":[74],"leveraging":[75],"transformer":[76],"models":[77,122,189,200],"to":[78],"generate":[79],"captions":[80,173],"Urdu,":[82],"significantly":[84],"more":[85],"challenging":[86],"task":[87],"than":[88],"English.":[89],"To":[90],"facilitate":[91],"training":[93],"evaluation":[95],"our":[97,155],"models,":[98],"created":[100],"an":[101],"Urdu-translated":[102],"subset":[103],"flickr8k":[106],"dataset,":[107],"which":[108],"contains":[109],"images":[110],"featuring":[111],"dogs":[112],"action":[114],"accompanied":[115],"by":[116],"corresponding":[117],"captions.":[119],"designed":[121],"encompassed":[123],"deep":[125],"learning-based":[126],"approach,":[127],"utilizing":[128],"different":[130],"architectures:":[131],"Convolutional":[132],"Neural":[133],"Network":[134],"(CNN)":[135],"+":[136],"Long":[137],"Short-term":[138],"Memory":[139],"(LSTM)":[140],"Soft":[142],"attention":[143],"employing":[144],"word2Vec":[145],"embeddings,":[146],"CNN+Transformer,":[147],"Vit+Roberta":[149],"models.":[150],"Experimental":[151],"results":[152],"demonstrate":[153],"proposed":[156],"model":[157],"outperforms":[158],"existing":[159],"state-of-the-art":[160],"approaches,":[161],"achieving":[162],"86":[163],"BLEU-1":[164],"90":[166],"BERT-F1":[167],"scores.":[168],"The":[169],"generated":[170],"exhibit":[174],"syntactic,":[175],"contextual,":[176],"semantic":[178],"correctness.":[179],"study":[181],"highlights":[182],"inherent":[184],"challenges":[185],"associated":[186],"retraining":[188],"languages.":[192],"findings":[194],"highlight":[195],"potential":[197],"pre-trained":[199],"facilitating":[202],"NLP":[206],"CV":[208],"applications":[209],"settings.":[213]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4400241693","counts_by_year":[],"updated_date":"2025-02-18T11:44:24.635935","created_date":"2024-07-03"}