{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T13:40:08Z","timestamp":1727703608003},"reference-count":63,"publisher":"Springer Science and Business Media LLC","issue":"23","license":[{"start":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T00:00:00Z","timestamp":1726617600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T00:00:00Z","timestamp":1726617600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1007\/s10489-024-05774-7","type":"journal-article","created":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T17:05:52Z","timestamp":1726765552000},"page":"12492-12504","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Adaptive multimodal prompt for human-object interaction with local feature enhanced transformer"],"prefix":"10.1007","volume":"54","author":[{"given":"Kejun","family":"Xue","sequence":"first","affiliation":[]},{"given":"Yongbin","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Zhijun","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Xiaoyan","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Wenjun","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Mingxuan","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Chenmou","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,18]]},"reference":[{"key":"5774_CR1","doi-asserted-by":"crossref","unstructured":"Fouhey DF, Kuo WC, Efros AA, Malik J (2018) From lifestyle vlogs to everyday interactions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4991\u20135000","DOI":"10.1109\/CVPR.2018.00524"},{"key":"5774_CR2","doi-asserted-by":"crossref","unstructured":"Li S, Du Y, Torralba A, Sivic J, Russell B (2021) Weakly supervised human-object interaction detection in video via contrastive spatiotemporal regions. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1845\u20131855","DOI":"10.1109\/ICCV48922.2021.00186"},{"key":"5774_CR3","doi-asserted-by":"crossref","unstructured":"Morais R, Le V, Venkatesh S, Tran T (2021) Learning asynchronous and sparse human-object interaction in videos. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 16041\u201316050","DOI":"10.1109\/CVPR46437.2021.01578"},{"issue":"6","key":"5774_CR4","doi-asserted-by":"publisher","first-page":"2799","DOI":"10.1109\/TIP.2018.2890749","volume":"28","author":"Z Tu","year":"2019","unstructured":"Tu Z, Li H, Zhang D, Dauwels J, Li B, Yuan J (2019) Action-stage emphasized spatiotemporal vlad for video action recognition. IEEE Trans Image Process 28(6):2799\u20132812","journal-title":"IEEE Trans Image Process"},{"issue":"5","key":"5774_CR5","doi-asserted-by":"publisher","first-page":"1423","DOI":"10.1109\/TCSVT.2018.2830102","volume":"29","author":"Z Tu","year":"2018","unstructured":"Tu Z, Xie W, Dauwels J, Li B, Yuan J (2018) Semantic cues enhanced multimodality multistream cnn for action recognition. IEEE Transactions on Circuits and Systems for Video Technology 29(5):1423\u20131437","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"5774_CR6","doi-asserted-by":"crossref","unstructured":"Wang S, Duan Y, Ding H, Tan YP, Yap KH, Yuan J (2022) Learning transferable human-object interaction detector with natural language supervision. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 939\u2013948","DOI":"10.1109\/CVPR52688.2022.00101"},{"key":"5774_CR7","first-page":"37416","volume":"35","author":"H Yuan","year":"2022","unstructured":"Yuan H, Jiang J, Albanie S, Feng T, Huang Z, Ni D, Tang M (2022) Rlip: relational language-image pre-training for human-object interaction detection. Adv Neural Inf Process Syst 35:37416\u201337431","journal-title":"Adv Neural Inf Process Syst"},{"key":"5774_CR8","doi-asserted-by":"crossref","unstructured":"Liao Y, Zhang A, Lu M, Wang Y, Li X, Liu S (2022) Gen-vlkt: Simplify association and enhance interaction understanding for hoi detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 20123\u201320132","DOI":"10.1109\/CVPR52688.2022.01949"},{"key":"5774_CR9","doi-asserted-by":"crossref","unstructured":"Pan M, Shen H (2024) Multimodal variational contrastive learning for few-shot classification. Applied Intelligence, 1\u201314","DOI":"10.1007\/s10489-024-05269-5"},{"issue":"22","key":"5774_CR10","doi-asserted-by":"publisher","first-page":"27191","DOI":"10.1007\/s10489-023-04890-0","volume":"53","author":"H Liao","year":"2023","unstructured":"Liao H, Wang Q, Zhao S, Xing T, Hu R (2023) Domain consensual contrastive learning for few-shot universal domain adaptation. Appl Intell 53(22):27191\u201327206","journal-title":"Appl Intell"},{"issue":"22","key":"5774_CR11","doi-asserted-by":"publisher","first-page":"27207","DOI":"10.1007\/s10489-023-04971-0","volume":"53","author":"H Kan","year":"2023","unstructured":"Kan H, Yu J, Huang J, Liu Z, Wang H, Zhou H (2023) Self-supervised group meiosis contrastive learning for eeg-based emotion recognition. Appl Intell 53(22):27207\u201327225","journal-title":"Appl Intell"},{"key":"5774_CR12","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1162\/tacl_a_00468","volume":"10","author":"E Ben-David","year":"2022","unstructured":"Ben-David E, Oved N, Reichart R (2022) Pada: example-based prompt learning for on-the-fly adaptation to unseen domains. Trans Assoc Comput Linguist 10:414\u2013433","journal-title":"Trans Assoc Comput Linguist"},{"key":"5774_CR13","unstructured":"Liu X, Zheng Y, Du Z, Ding M, Qian Y, Yang Z, Tang J (2021) Gpt understands, too. arXiv preprint arXiv:2103.10385"},{"key":"5774_CR14","unstructured":"Radford A, Kim JW, Hallacy C, Ramesh A, Goh G, Agarwal S, Sastry G, Askell A, Mishkin P, Clark J et al (2021) Learning transferable visual models from natural language supervision. In: International conference on machine learning, pp 8748\u20138763. PMLR"},{"key":"5774_CR15","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S et al (2020) An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"5774_CR16","doi-asserted-by":"crossref","unstructured":"Jia M, Tang L, Chen BC, Cardie C, Belongie S, Hariharan B, Lim SN (2022) Visual prompt tuning. In: European conference on computer vision, pp 709\u2013727. Springer","DOI":"10.1007\/978-3-031-19827-4_41"},{"key":"5774_CR17","doi-asserted-by":"crossref","unstructured":"Wang Z, Zhang Z, Lee CY, Zhang H, Sun R, Ren X, Su G, Perot V, Dy J, Pfister T (2022) Learning to prompt for continual learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 139\u2013149","DOI":"10.1109\/CVPR52688.2022.00024"},{"key":"5774_CR18","unstructured":"Zang Y, Li W, Zhou K, Huang C, Loy CC (2022) Unified vision and language prompt learning. arXiv preprint arXiv:2210.07225"},{"issue":"12","key":"5774_CR19","doi-asserted-by":"publisher","first-page":"15352","DOI":"10.1007\/s10489-022-04282-w","volume":"53","author":"F Yang","year":"2023","unstructured":"Yang F, Zhang QX, Ding XJ, Ma FM, Cao J, Tong DY (2023) Semantic preserving asymmetric discrete hashing for cross-modal retrieval. Appl Intell 53(12):15352\u201315371","journal-title":"Appl Intell"},{"key":"5774_CR20","doi-asserted-by":"crossref","unstructured":"Cho K, Van\u00a0Merri\u00ebnboer B, Bahdanau D, Bengio Y (2014) On the properties of neural machine translation: encoder-decoder approaches. arXiv preprint arXiv:1409.1259","DOI":"10.3115\/v1\/W14-4012"},{"key":"5774_CR21","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"5774_CR22","doi-asserted-by":"crossref","unstructured":"Chao YW, Wang Z, He Y, Wang J, Deng J (2015) Hico: A benchmark for recognizing human-object interactions in images. In: Proceedings of the IEEE international conference on computer vision, pp 1017\u20131025","DOI":"10.1109\/ICCV.2015.122"},{"key":"5774_CR23","doi-asserted-by":"crossref","unstructured":"Wang S, Yap KH, Yuan J, Tan YP (2020) Discovering human interactions with novel objects via zero-shot learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 11652\u201311661","DOI":"10.1109\/CVPR42600.2020.01167"},{"key":"5774_CR24","doi-asserted-by":"publisher","first-page":"114135","DOI":"10.1016\/j.psychres.2021.114135","volume":"304","author":"J Sarzynska-Wawer","year":"2021","unstructured":"Sarzynska-Wawer J, Wawer A, Pawlak A, Szymanowska J, Stefaniak I, Jarkiewicz M, Okruszek L (2021) Detecting formal thought disorder by deep contextualized word representations. Psychiatry Res 304:114135","journal-title":"Psychiatry Res"},{"issue":"20","key":"5774_CR25","doi-asserted-by":"publisher","first-page":"24142","DOI":"10.1007\/s10489-023-04808-w","volume":"53","author":"N Wu","year":"2023","unstructured":"Wu N, Kera H, Kawamoto K (2023) Improving zero-shot action recognition using human instruction with text description. Appl Intell 53(20):24142\u201324156","journal-title":"Appl Intell"},{"issue":"9","key":"5774_CR26","doi-asserted-by":"publisher","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","volume":"130","author":"K Zhou","year":"2022","unstructured":"Zhou K, Yang J, Loy CC, Liu Z (2022) Learning to prompt for vision-language models. Int J Comput Vision 130(9):2337\u20132348","journal-title":"Int J Comput Vision"},{"key":"5774_CR27","doi-asserted-by":"crossref","unstructured":"Zhou K, Yang J, Loy CC, Liu Z (2022) Conditional prompt learning for vision-language models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 16816\u201316825","DOI":"10.1109\/CVPR52688.2022.01631"},{"key":"5774_CR28","doi-asserted-by":"crossref","unstructured":"Rao Y, Zhao W, Chen G, Tang Y, Zhu Z, Huang G, Zhou J, Lu J (2022) Denseclip: language-guided dense prediction with context-aware prompting. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 18082\u201318091","DOI":"10.1109\/CVPR52688.2022.01755"},{"key":"5774_CR29","doi-asserted-by":"crossref","unstructured":"Khattak MU, Rasheed H, Maaz M, Khan S, Khan FS (2023) Maple: multi-modal prompt learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 19113\u201319122","DOI":"10.1109\/CVPR52729.2023.01832"},{"key":"5774_CR30","doi-asserted-by":"crossref","unstructured":"Gao C, Xu J, Zou Y, Huang JB (2020) Drg: dual relation graph for human-object interaction detection. In: Computer vision\u2013ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XII 16, pp. 696\u2013712. Springer","DOI":"10.1007\/978-3-030-58610-2_41"},{"key":"5774_CR31","doi-asserted-by":"crossref","unstructured":"Gkioxari G, Girshick R, Doll\u00e1r P, He K (2018) Detecting and recognizing human-object interactions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8359\u20138367","DOI":"10.1109\/CVPR.2018.00872"},{"key":"5774_CR32","first-page":"5011","volume":"33","author":"YL Li","year":"2020","unstructured":"Li YL, Liu X, Wu X, Li Y, Lu C (2020) Hoi analysis: integrating and decomposing human-object interaction. Adv Neural Inf Process Syst 33:5011\u20135022","journal-title":"Adv Neural Inf Process Syst"},{"key":"5774_CR33","doi-asserted-by":"crossref","unstructured":"Qi S, Wang W, Jia B, Shen J, Zhu SC (2018) Learning human-object interactions by graph parsing neural networks. In: Proceedings of the European Conference on Computer Vision (ECCV), pp 401\u2013417","DOI":"10.1007\/978-3-030-01240-3_25"},{"key":"5774_CR34","doi-asserted-by":"crossref","unstructured":"Wang T, Anwer RM, Khan MH, Khan FS, Pang Y, Shao L, Laaksonen J (2019) Deep contextual attention for human-object interaction detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 5694\u20135702","DOI":"10.1109\/ICCV.2019.00579"},{"key":"5774_CR35","doi-asserted-by":"crossref","unstructured":"Xiao T, Fan Q, Gutfreund D, Monfort M, Oliva A, Zhou B (2019) Reasoning about human-object interactions through dual attention networks. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 3919\u20133928","DOI":"10.1109\/ICCV.2019.00402"},{"key":"5774_CR36","doi-asserted-by":"crossref","unstructured":"Hou Z, Yu B, Qiao Y, Peng X, Tao D (2021) Affordance transfer learning for human-object interaction detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 495\u2013504","DOI":"10.1109\/CVPR46437.2021.00056"},{"key":"5774_CR37","doi-asserted-by":"crossref","unstructured":"Huynh D, Elhamifar E (2021) Interaction compass: multi-label zero-shot learning of human-object interactions via spatial relations. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 8472\u20138483","DOI":"10.1109\/ICCV48922.2021.00836"},{"key":"5774_CR38","doi-asserted-by":"publisher","first-page":"10460","DOI":"10.1609\/aaai.v34i07.6616","volume":"34","author":"A Bansal","year":"2020","unstructured":"Bansal A, Rambhatla SS, Shrivastava A, Chellappa R (2020) Detecting human-object interactions via functional generalization. Proceedings of the AAAI Conference on Artificial Intelligence 34:10460\u201310469","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"5774_CR39","doi-asserted-by":"crossref","unstructured":"Gupta T, Schwing A, Hoiem D (2019) No-frills human-object interaction detection: factorization, layout encodings, and training techniques. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9677\u20139685","DOI":"10.1109\/ICCV.2019.00977"},{"key":"5774_CR40","doi-asserted-by":"crossref","unstructured":"Peyre J, Laptev I, Schmid C, Sivic J (2019) Detecting unseen visual relations using analogies. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 1981\u20131990","DOI":"10.1109\/ICCV.2019.00207"},{"key":"5774_CR41","doi-asserted-by":"crossref","unstructured":"Yuan H, Zhang S, Wang X, Albanie S, Pan Y, Feng T, Jiang J, Ni D, Zhang Y, Zhao D (2023) Rlipv2: fast scaling of relational language-image pre-training. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 21649\u201321661","DOI":"10.1109\/ICCV51070.2023.01979"},{"key":"5774_CR42","doi-asserted-by":"crossref","unstructured":"Ning S, Qiu L, Liu Y, He X (2023) Hoiclip: efficient knowledge transfer for hoi detection with vision-language models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 23507\u201323517","DOI":"10.1109\/CVPR52729.2023.02251"},{"key":"5774_CR43","doi-asserted-by":"crossref","unstructured":"Hou Z, Peng X, Qiao Y, Tao D (2020) Visual compositional learning for human-object interaction detection. In: Computer vision\u2013ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, proceedings, Part XV 16, pp 584\u2013600 Springer","DOI":"10.1007\/978-3-030-58555-6_35"},{"key":"5774_CR44","doi-asserted-by":"crossref","unstructured":"Hou Z, Yu B, Qiao Y, Peng X, Tao D (2021) Detecting human-object interaction via fabricated compositional learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14646\u201314655","DOI":"10.1109\/CVPR46437.2021.01441"},{"key":"5774_CR45","doi-asserted-by":"crossref","unstructured":"Liu Y, Yuan J, Chen CW (2020) Consnet: learning consistency graph for zero-shot human-object interaction detection. In: Proceedings of the 28th ACM international conference on multimedia, pp 4235\u20134243","DOI":"10.1145\/3394171.3413600"},{"key":"5774_CR46","unstructured":"Jia C, Yang Y, Xia Y, Chen YT, Parekh Z, Pham H, Le Q, Sung YH, Li Z, Duerig T (2021) Scaling up visual and vision-language representation learning with noisy text supervision. In: International conference on machine learning, pp 4904\u20134916. PMLR"},{"key":"5774_CR47","doi-asserted-by":"crossref","unstructured":"Du Y, Wei F, Zhang Z, Shi M, Gao Y, Li G (2022) Learning to prompt for open-vocabulary object detection with vision-language model. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14084\u201314093","DOI":"10.1109\/CVPR52688.2022.01369"},{"key":"5774_CR48","doi-asserted-by":"crossref","unstructured":"Feng C, Zhong Y, Jie Z, Chu X, Ren H, Wei X, Xie W, Ma L (2022) Promptdet: towards open-vocabulary detection using uncurated images. In: European conference on computer vision, pp 701\u2013717. Springer","DOI":"10.1007\/978-3-031-20077-9_41"},{"key":"5774_CR49","doi-asserted-by":"crossref","unstructured":"Cao Y, Tang Q, Yang F, Su X, You S, Lu X, Xu C (2023) Re-mine, learn and reason: exploring the cross-modal semantic correlations for language-guided hoi detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 23492\u201323503","DOI":"10.1109\/ICCV51070.2023.02147"},{"key":"5774_CR50","doi-asserted-by":"publisher","first-page":"3206","DOI":"10.1609\/aaai.v36i3.20229","volume":"36","author":"H Yuan","year":"2022","unstructured":"Yuan H, Wang M, Ni D, Xu L (2022) Detecting human-object interactions with object-guided cross-modal calibrated semantics. Proceedings of the AAAI Conference on Artificial Intelligence 36:3206\u20133214","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"5774_CR51","doi-asserted-by":"crossref","unstructured":"Zhao L, Yuan L, Gong B, Cui Y, Schroff F, Yang MH, Adam H, Liu T (2023) Unified visual relationship detection with vision and language models. arXiv preprint arXiv:2303.08998","DOI":"10.1109\/ICCV51070.2023.00641"},{"key":"5774_CR52","unstructured":"Li L, Xiao J, Chen G, Shao J, Zhuang Y, Chen L (2023) Zero-shot visual relation detection via composite visual cues from large language models. arXiv preprint arXiv:2305.12476"},{"key":"5774_CR53","doi-asserted-by":"crossref","unstructured":"Wu M, Gu J, Shen Y, Lin M, Chen C, Sun X (2023) End-to-end zero-shot hoi detection via vision and language knowledge distillation. Proceedings of the AAAI Conference on Artificial Intelligence 37:2839\u20132846","DOI":"10.1609\/aaai.v37i3.25385"},{"key":"5774_CR54","doi-asserted-by":"crossref","unstructured":"Zong D, Sun S (2023) Zero-shot human\u2013object interaction detection via similarity propagation. IEEE Transactions on Neural Networks and Learning Systems","DOI":"10.1109\/TNNLS.2023.3309104"},{"key":"5774_CR55","doi-asserted-by":"crossref","unstructured":"Li Z, An G (2022) Human-object interaction prediction with natural language supervision. In: 2022 16th IEEE International Conference on Signal Processing (ICSP), vol 1, pp 124\u2013128. IEEE","DOI":"10.1109\/ICSP56322.2022.9965210"},{"key":"5774_CR56","unstructured":"Devlin J, Chang MW, Lee K, Toutanova K (2018) Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805"},{"key":"5774_CR57","unstructured":"Liu Y, Ott M, Goyal N, Du J, Joshi M, Chen D, Levy O, Lewis M, Zettlemoyer L, Stoyanov V (2019) Roberta: a robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692"},{"key":"5774_CR58","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"5774_CR59","unstructured":"Loshchilov I, Hutter F (2017) Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101"},{"key":"5774_CR60","doi-asserted-by":"crossref","unstructured":"Gao J, Yap KH, Wu K, Phan DT, Garg K, Han BS (2024) Contextual human object interaction understanding from pre-trained large language model. In: ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp 13436\u201313440. IEEE","DOI":"10.1109\/ICASSP48485.2024.10447511"},{"key":"5774_CR61","doi-asserted-by":"crossref","unstructured":"Pratt S, Yatskar M, Weihs L, Farhadi A, Kembhavi A (2020) Grounded situation recognition. In: Computer vision\u2013ECCV 2020: 16th European conference, Glasgow, UK, August 23\u201328, 2020, proceedings, Part IV 16, pp 314\u2013332. Springer","DOI":"10.1007\/978-3-030-58548-8_19"},{"key":"5774_CR62","doi-asserted-by":"crossref","unstructured":"Wang S, Yap KH, Ding H, Wu J, Yuan J, Tan YP (2021) Discovering human interactions with large-vocabulary objects via query and multi-scale detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 13475\u201313484","DOI":"10.1109\/ICCV48922.2021.01322"},{"key":"5774_CR63","doi-asserted-by":"crossref","unstructured":"Tamura M, Ohashi H, Yoshinaga T (2021) Qpic: query-based pairwise human-object interaction detection with image-wide contextual information. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10410\u201310419","DOI":"10.1109\/CVPR46437.2021.01027"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05774-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-024-05774-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05774-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T13:13:55Z","timestamp":1727702035000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-024-05774-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,18]]},"references-count":63,"journal-issue":{"issue":"23","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["5774"],"URL":"https:\/\/doi.org\/10.1007\/s10489-024-05774-7","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2024,9,18]]},"assertion":[{"value":"12 August 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 September 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Informed consent has been obtained from Shanghai University of Engineering Science for the publication of this article, as well as from all authors.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical and Informed Consent for Data Used"}},{"value":"The corresponding author of this paper holds the role of associate editor at Applied Intelligence.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}}]}}