{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T07:48:51Z","timestamp":1724140131052},"reference-count":66,"publisher":"SPIE-Intl Soc Optical Eng","issue":"01","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Electron. Imag."],"published-print":{"date-parts":[[2024,1,24]]},"DOI":"10.1117\/1.jei.33.1.013028","type":"journal-article","created":{"date-parts":[[2024,1,24]],"date-time":"2024-01-24T07:49:20Z","timestamp":1706082560000},"source":"Crossref","is-referenced-by-count":1,"title":["TEG: image theme recognition using text-embedding-guided few-shot adaptation"],"prefix":"10.1117","volume":"33","author":[{"given":"Jikai","family":"Wang","sequence":"first","affiliation":[{"name":"Wenzhou University, Key Laboratory of Intelligent Informatics for Safety and Emergency of Zhejiang Province, Wenzhou, China"}]},{"given":"Wanglong","family":"Lu","sequence":"additional","affiliation":[{"name":"Memorial University of Newfoundland, Department of Computer Science, St. John\u2019s, Newfoundland, Canada"}]},{"given":"Yu","family":"Wang","sequence":"additional","affiliation":[{"name":"Wenzhou University, Key Laboratory of Intelligent Informatics for Safety and Emergency of Zhejiang Province, Wenzhou, China"}]},{"given":"Kaijie","family":"Shi","sequence":"additional","affiliation":[{"name":"Memorial University of Newfoundland, Department of Computer Science, St. John\u2019s, Newfoundland, Canada"}]},{"given":"Xianta","family":"Jiang","sequence":"additional","affiliation":[{"name":"Memorial University of Newfoundland, Department of Computer Science, St. John\u2019s, Newfoundland, Canada"}]},{"given":"Hanli","family":"Zhao","sequence":"additional","affiliation":[{"name":"Wenzhou University, Key Laboratory of Intelligent Informatics for Safety and Emergency of Zhejiang Province, Wenzhou, China"}]}],"member":"189","reference":[{"key":"r1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-018-5910-z"},{"key":"r2","article-title":"Theme definition","year":"2023"},{"key":"r3","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"r4","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"r5","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"r6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"r7","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009"},{"key":"r8","first-page":"147","article-title":"A combined corner and edge detector","author":"Harris","year":"1988"},{"key":"r9","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2018.03.064"},{"key":"r10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.177"},{"key":"r11","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"r12","doi-asserted-by":"publisher","DOI":"10.1007\/11744023_32"},{"key":"r13","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.08.030"},{"key":"r14","first-page":"2980","article-title":"Mask R-CNN","author":"He","year":"2017"},{"key":"r15","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI.31.6.063057"},{"key":"r16","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI.31.6.063022"},{"key":"r17","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI.30.1.013017"},{"key":"r18","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"r19","doi-asserted-by":"publisher","DOI":"10.1002\/ima.22428"},{"key":"r20","doi-asserted-by":"publisher","DOI":"10.1007\/s11390-022-2131-8"},{"key":"r21","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI.29.6.063004"},{"key":"r22","doi-asserted-by":"publisher","DOI":"10.1117\/1.JEI.31.3.033044"},{"key":"r23","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2015"},{"key":"r24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"r25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"r26","first-page":"6105","article-title":"EfficientNet: rethinking model scaling for convolutional neural networks","author":"Tan","year":"2019"},{"key":"r27","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"r28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"r29","article-title":"An image is worth 16x16 words: transformers for image recognition at scale","author":"Dosovitskiy","year":"2021"},{"key":"r30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"r31","first-page":"10012","article-title":"Swin transformer: hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"key":"r32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"r33","first-page":"7794","article-title":"Non-local neural networks","author":"Wang","year":"2018"},{"key":"r34","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-023-0364-2"},{"key":"r35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01055"},{"key":"r36","article-title":"Beit: BERT pre-training of image transformers","author":"Bao","year":"2022"},{"key":"r37","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","author":"Jia","year":"2021"},{"key":"r38","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126658"},{"key":"r39","first-page":"1","article-title":"Contrastive learning of medical visual representations from paired images and text","author":"Zhang","year":"2022"},{"key":"r40","first-page":"15638","article-title":"FLAVA: a foundational language and vision alignment model","author":"Singh","year":"2022"},{"key":"r41","article-title":"Florence: a new foundation model for computer vision","author":"Yuan","year":"2021"},{"key":"r42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"r43","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","author":"Chen","year":"2020"},{"key":"r44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"r45","first-page":"15659","article-title":"Prompt-aligned gradient for prompt tuning","author":"Zhu","year":"2022"},{"key":"r46","article-title":"Test-time prompt tuning for zero-shot generalization in vision-language models","author":"Shu","year":"2022"},{"key":"r47","first-page":"30569","article-title":"Dualcoop: fast adaptation to multi-label recognition with limited annotations","author":"Sun","year":"2022"},{"key":"r48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00780"},{"key":"r49","article-title":"Music theme recognition using CNN and self-attention","author":"Sukhavasi","year":"2019"},{"key":"r50","article-title":"SELAB-HCMUS at MediaEval 2021: music theme and emotion classification with co-teaching training strategy","author":"Pham","year":"2021"},{"key":"r51","first-page":"1","article-title":"Theme based clustering of tweets","author":"Tripathy","year":"2014"},{"key":"r52","doi-asserted-by":"publisher","DOI":"10.17507\/jltr.1105.13"},{"key":"r53","first-page":"1","article-title":"Efficient domain adaptation for painting theme recognition","author":"Badea","year":"2017"},{"key":"r54","first-page":"3013","article-title":"Theme-based multi-class object recognition and segmentation","author":"Wu","year":"2010"},{"key":"r55","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019021"},{"key":"r56","article-title":"Clipcap: clip prefix for image captioning","author":"Mokady","year":"2021"},{"key":"r57","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"},{"key":"r58","doi-asserted-by":"crossref","article-title":"Towards end-to-end license plate detection and recognition: a large dataset and baseline","author":"Xu","year":"2018","DOI":"10.1007\/978-3-030-01261-8_16"},{"key":"r59","first-page":"722","article-title":"Automated flower classification over a large number of classes","author":"Nilsback","year":"2008"},{"key":"r60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299023"},{"key":"r61","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2723009"},{"key":"r62","doi-asserted-by":"publisher","DOI":"10.1038\/sdata.2018.161"},{"key":"r63","article-title":"The definition of theme categories","year":"2023"},{"key":"r64","first-page":"11976","article-title":"A convnet for the 2020s","author":"Liu","year":"2022"},{"key":"r65","first-page":"12934","article-title":"Efficientformer: vision transformers at mobilenet speed","author":"Li","year":"2022"},{"key":"r66","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_29"}],"container-title":["Journal of Electronic Imaging"],"original-title":[],"link":[{"URL":"https:\/\/www.spiedigitallibrary.org\/journalArticle\/Download?urlId=10.1117%2f1.JEI.33.1.013028","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,31]],"date-time":"2024-01-31T01:16:55Z","timestamp":1706663815000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.spiedigitallibrary.org\/journals\/journal-of-electronic-imaging\/volume-33\/issue-01\/013028\/TEG--image-theme-recognition-using-text-embedding-guided-few\/10.1117\/1.JEI.33.1.013028.full"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,24]]},"references-count":66,"journal-issue":{"issue":"01","published-online":{"date-parts":[[2024,1,1]]}},"URL":"https:\/\/doi.org\/10.1117\/1.jei.33.1.013028","relation":{},"ISSN":["1017-9909"],"issn-type":[{"value":"1017-9909","type":"print"}],"subject":[],"published":{"date-parts":[[2024,1,24]]}}}