{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,13]],"date-time":"2024-09-13T12:41:59Z","timestamp":1726231319974},"publisher-location":"Cham","reference-count":26,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031439957"},{"type":"electronic","value":"9783031439964"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43996-4_3","type":"book-chapter","created":{"date-parts":[[2023,9,30]],"date-time":"2023-09-30T23:07:48Z","timestamp":1696115268000},"page":"24-34","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Surgical Video Captioning with\u00a0Mutual-Modal Concept Alignment"],"prefix":"10.1007","author":[{"given":"Zhen","family":"Chen","sequence":"first","affiliation":[]},{"given":"Qingyu","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Leo K. T.","family":"Yeung","sequence":"additional","affiliation":[]},{"given":"Danny T. M.","family":"Chan","sequence":"additional","affiliation":[]},{"given":"Zhen","family":"Lei","sequence":"additional","affiliation":[]},{"given":"Hongbin","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Jinqiao","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,10,1]]},"reference":[{"key":"3_CR1","unstructured":"Allan, M., et al.: 2018 robotic scene segmentation challenge. arXiv preprint arXiv:2001.11190 (2020)"},{"key":"3_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1007\/978-3-319-46454-1_24","volume-title":"Computer Vision \u2013 ECCV 2016","author":"P Anderson","year":"2016","unstructured":"Anderson, P., Fernando, B., Johnson, M., Gould, S.: SPICE: semantic propositional image caption evaluation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 382\u2013398. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_24"},{"key":"3_CR3","unstructured":"Banerjee, S., Lavie, A.: METEOR: an automatic metric for MT evaluation with improved correlation with human judgments. In: ACL Workshop, pp. 65\u201372 (2005)"},{"issue":"2","key":"3_CR4","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1515\/cdbme-2021-2098","volume":"7","author":"R Bieck","year":"2021","unstructured":"Bieck, R., et al.: Generation of surgical reports using keyword-augmented next sequence prediction. Curr. Direct. Biomed. Eng. 7(2), 387\u2013390 (2021)","journal-title":"Curr. Direct. Biomed. Eng."},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Cornia, M., Stefanini, M., Baraldi, L., Cucchiara, R.: Meshed-memory transformer for image captioning. In: CVPR, pp. 10578\u201310587 (2020)","DOI":"10.1109\/CVPR42600.2020.01059"},{"key":"3_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1007\/978-3-030-87202-1_58","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2021","author":"T Czempiel","year":"2021","unstructured":"Czempiel, T., Paschali, M., Ostler, D., Kim, S.T., Busam, B., Navab, N.: OperA: attention-regularized transformers for surgical phase recognition. In: de Bruijne, M., et al. (eds.) MICCAI 2021. LNCS, vol. 12904, pp. 604\u2013614. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-87202-1_58"},{"key":"3_CR7","unstructured":"Dosovitskiy, A., et al.: An image is worth 16$$\\times $$16 words: transformers for image recognition at scale. In: ICLR (2021)"},{"issue":"1165","key":"3_CR8","doi-asserted-by":"publisher","first-page":"820","DOI":"10.1136\/postgradmedj-2021-139862","volume":"98","author":"S Elnikety","year":"2022","unstructured":"Elnikety, S., Badr, E., Abdelaal, A.: Surgical training fit for the future: the need for a change. Postgrad. Med. J. 98(1165), 820\u2013823 (2022)","journal-title":"Postgrad. Med. J."},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Huang, L., Wang, W., Chen, J., Wei, X.Y.: Attention on attention for image captioning. In: ICCV, pp. 4634\u20134643 (2019)","DOI":"10.1109\/ICCV.2019.00473"},{"key":"3_CR10","unstructured":"Khosla, P., et al.: Supervised contrastive learning. In: NeurIPS, vol. 33, pp. 18661\u201318673 (2020)"},{"key":"3_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"507","DOI":"10.1007\/978-3-031-16449-1_48","volume-title":"Medical Image Computing and Computer Assisted Intervention - MICCAI 2022","author":"C Lin","year":"2022","unstructured":"Lin, C., Zheng, S., Liu, Z., Li, Y., Zhu, Z., Zhao, Y.: SGT: scene graph-guided transformer for surgical report generation. In: Wang, L., Dou, Q., Fletcher, P.T., Speidel, S., Li, S. (eds.) MICCAI 2022. LNCS, vol. 13437, pp. 507\u2013518. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-16449-1_48"},{"key":"3_CR12","unstructured":"Lin, C.Y.: ROUGE: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381 (2004)"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Lin, K., et al.: SwinBERT: end-to-end transformers with sparse attention for video captioning. In: CVPR, pp. 17949\u201317958 (2022)","DOI":"10.1109\/CVPR52688.2022.01742"},{"key":"3_CR14","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: ICCV, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Video swin transformer. In: CVPR, pp. 3202\u20133211 (2022)","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Loper, E., Bird, S.: NLTK: the natural language toolkit. arXiv preprint cs\/0205028 (2002)","DOI":"10.3115\/1118108.1118117"},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Madani, A., et al.: Artificial intelligence for intraoperative guidance: using semantic segmentation to identify surgical anatomy during laparoscopic cholecystectomy. Ann. Surg. (2020)","DOI":"10.1097\/SLA.0000000000004594"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Nwoye, C.I., et al.: CholecTriplet 2021: a benchmark challenge for surgical action triplet recognition. Med. Image Anal. 86, 102803 (2023)","DOI":"10.1016\/j.media.2023.102803"},{"key":"3_CR19","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: BLEU: a method for automatic evaluation of machine translation. In: ACL, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"3_CR20","unstructured":"Paszke, A., et al.: PyTorch: an imperative style, high-performance deep learning library. arXiv preprint arXiv:1912.01703 (2019)"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Rennie, S.J., Marcheret, E., Mroueh, Y., Ross, J., Goel, V.: Self-critical sequence training for image captioning. In: CVPR, pp. 7008\u20137024 (2017)","DOI":"10.1109\/CVPR.2017.131"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Lawrence Zitnick, C., Parikh, D.: CIDEr: consensus-based image description evaluation. In: CVPR, pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"3_CR23","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1007\/978-3-030-87202-1_26","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2021","author":"M Xu","year":"2021","unstructured":"Xu, M., Islam, M., Lim, C.M., Ren, H.: Class-incremental domain adaptation with smoothing and calibration for\u00a0surgical report generation. In: de Bruijne, M., et al. (eds.) MICCAI 2021. LNCS, vol. 12904, pp. 269\u2013278. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-87202-1_26"},{"key":"3_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"376","DOI":"10.1007\/978-3-031-16449-1_36","volume-title":"Medical Image Computing and Computer Assisted Intervention - MICCAI 2022","author":"M Xu","year":"2022","unstructured":"Xu, M., Islam, M., Ren, H.: Rethinking surgical captioning: end-to-end window-based MLP transformer using patches. In: Wang, L., Dou, Q., Fletcher, P.T., Speidel, S., Li, S. (eds.) MICCAI 2022. LNCS, vol. 13437, pp. 376\u2013386. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-16449-1_36"},{"key":"3_CR25","unstructured":"Yu, J., Wang, Z., Vasudevan, V., Yeung, L., Seyedhosseini, M., Wu, Y.: CoCa: contrastive captioners are image-text foundation models. Trans. Mach. Learn. Res. (2022)"},{"key":"3_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"290","DOI":"10.1007\/978-3-030-87202-1_28","volume-title":"Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2021","author":"J Zhang","year":"2021","unstructured":"Zhang, J., Nie, Y., Chang, J., Zhang, J.J.: Surgical instruction generation with transformers. In: de Bruijne, M., et al. (eds.) MICCAI 2021. LNCS, vol. 12904, pp. 290\u2013299. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-87202-1_28"}],"container-title":["Lecture Notes in Computer Science","Medical Image Computing and Computer Assisted Intervention \u2013 MICCAI 2023"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43996-4_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,4]],"date-time":"2024-07-04T16:02:42Z","timestamp":1720108962000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43996-4_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031439957","9783031439964"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43996-4_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"1 October 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MICCAI","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Medical Image Computing and Computer-Assisted Intervention","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vancouver, BC","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Canada","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 October 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 October 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"miccai2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/conferences.miccai.org\/2023\/en\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2250","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"730","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"32% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}