{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T05:08:55Z","timestamp":1733807335651,"version":"3.30.1"},"publisher-location":"Singapore","reference-count":46,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819609710","type":"print"},{"value":"9789819609727","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0972-7_18","type":"book-chapter","created":{"date-parts":[[2024,12,9]],"date-time":"2024-12-09T08:06:33Z","timestamp":1733731593000},"page":"309-324","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Amodal Instance Segmentation with\u00a0Diffusion Shape Prior Estimation"],"prefix":"10.1007","author":[{"given":"Minh","family":"Tran","sequence":"first","affiliation":[]},{"given":"Khoa","family":"Vo","sequence":"additional","affiliation":[]},{"given":"Tri","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Ngan","family":"Le","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,10]]},"reference":[{"key":"18_CR1","unstructured":"Amit, T., Shaharbany, T., Nachmani, E., Wolf, L.: Segdiff: Image segmentation with diffusion probabilistic models. arXiv preprint arXiv:2112.00390 (2021)"},{"key":"18_CR2","doi-asserted-by":"crossref","unstructured":"Back, S., Lee, J., Kim, T., Noh, S., Kang, R., Bak, S., Lee, K.: Unseen object amodal instance segmentation via hierarchical occlusion modeling. In: ICRA. pp. 5085\u20135092. IEEE (2022)","DOI":"10.1109\/ICRA46639.2022.9811646"},{"key":"18_CR3","unstructured":"Baranchuk, D., Rubachev, I., Voynov, A., Khrulkov, V., Babenko, A.: Label-efficient semantic segmentation with diffusion models. arXiv preprint arXiv:2112.03126 (2021)"},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Brooks, T., Holynski, A., Efros, A.A.: Instructpix2pix: Learning to follow image editing instructions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 18392\u201318402 (2023)","DOI":"10.1109\/CVPR52729.2023.01764"},{"key":"18_CR5","first-page":"8780","volume":"34","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat gans on image synthesis. Adv. Neural. Inf. Process. Syst. 34, 8780\u20138794 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"4","key":"18_CR6","doi-asserted-by":"publisher","first-page":"501","DOI":"10.1037\/0096-3445.113.4.501","volume":"113","author":"J Duncan","year":"1984","unstructured":"Duncan, J.: Selective attention and the organization of visual information. J. Exp. Psychol. Gen. 113(4), 501 (1984)","journal-title":"J. Exp. Psychol. Gen."},{"key":"18_CR7","doi-asserted-by":"crossref","unstructured":"Follmann, P., K\u00f6nig, R., H\u00e4rtinger, P., Klostermann, M., B\u00f6ttger, T.: Learning to see the invisible: End-to-end trainable amodal instance segmentation. In: WACV. pp. 1328\u20131336. IEEE (2019)","DOI":"10.1109\/WACV.2019.00146"},{"key":"18_CR8","unstructured":"Gal, R., Alaluf, Y., Atzmon, Y., Patashnik, O., Bermano, A.H., Chechik, G., Cohen-Or, D.: An image is worth one word: Personalizing text-to-image generation using textual inversion. arXiv preprint arXiv:2208.01618 (2022)"},{"key":"18_CR9","doi-asserted-by":"crossref","unstructured":"Gao, J., Qian, X., Wang, Y., Xiao, T., He, T., Zhang, Z., Fu, Y.: Coarse-to-fine amodal segmentation with shape prior. In: ICCV. pp. 1262\u20131271 (2023)","DOI":"10.1109\/ICCV51070.2023.00122"},{"key":"18_CR10","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial nets. Advances in neural information processing systems 27 (2014)"},{"key":"18_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask r-cnn. In: ICCV. pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"18_CR13","first-page":"6840","volume":"33","author":"J Ho","year":"2020","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. Adv. Neural. Inf. Process. Syst. 33, 6840\u20136851 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR14","unstructured":"Ho, J., Salimans, T.: Classifier-free diffusion guidance. arXiv preprint arXiv:2207.12598 (2022)"},{"key":"18_CR15","unstructured":"Jang, W.D., Wei, D., Zhang, X., Leahy, B., Yang, H., Tompkin, J., Ben-Yosef, D., Needleman, D., Pfister, H.: Learning vector quantized shape code for amodal blastomere instance segmentation. arXiv preprint arXiv:2012.00985 (2020)"},{"key":"18_CR16","doi-asserted-by":"crossref","unstructured":"Ke, L., Danelljan, M., Li, X., Tai, Y.W., Tang, C.K., Yu, F.: Mask transfiner for high-quality instance segmentation. In: CVPR. pp. 4412\u20134421 (2022)","DOI":"10.1109\/CVPR52688.2022.00437"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Ke, L., Tai, Y.W., Tang, C.K.: Deep occlusion-aware instance segmentation with overlapping bilayers. In: CVPR. pp. 4019\u20134028 (2021)","DOI":"10.1109\/CVPR46437.2021.00401"},{"issue":"2","key":"18_CR18","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1016\/0010-0285(91)90009-D","volume":"23","author":"PJ Kellman","year":"1991","unstructured":"Kellman, P.J., Shipley, T.F.: A theory of visual interpolation in object perception. Cogn. Psychol. 23(2), 141\u2013221 (1991)","journal-title":"Cogn. Psychol."},{"key":"18_CR19","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"18_CR20","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Li, K., Malik, J.: Amodal instance segmentation. In: ECCV. pp. 677\u2013693. Springer (2016)","DOI":"10.1007\/978-3-319-46475-6_42"},{"key":"18_CR22","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: ECCV. pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"18_CR23","doi-asserted-by":"crossref","unstructured":"Mohan, R., Valada, A.: Amodal panoptic segmentation. In: CVPR. pp. 21023\u201321032 (2022)","DOI":"10.1109\/CVPR52688.2022.02035"},{"key":"18_CR24","unstructured":"Nguyen, Q., Vu, T., Tran, A., Nguyen, K.: Dataset diffusion: Diffusion-based synthetic data generation for pixel-level semantic segmentation. Advances in Neural Information Processing Systems 36 (2024)"},{"key":"18_CR25","unstructured":"Nichol, A., Dhariwal, P., Ramesh, A., Shyam, P., Mishkin, P., McGrew, B., Sutskever, I., Chen, M.: Glide: Towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741 (2021)"},{"key":"18_CR26","doi-asserted-by":"crossref","unstructured":"Ozguroglu, E., Liu, R., Sur\u00eds, D., Chen, D., Dave, A., Tokmakov, P., Vondrick, C.: pix2gestalt: Amodal segmentation by synthesizing wholes. arXiv preprint arXiv:2401.14398 (2024)","DOI":"10.1109\/CVPR52733.2024.00377"},{"key":"18_CR27","doi-asserted-by":"crossref","unstructured":"Qi, L., Jiang, L., Liu, S., Shen, X., Jia, J.: Amodal instance segmentation with kins dataset. In: CVPR. pp. 3014\u20133023 (2019)","DOI":"10.1109\/CVPR.2019.00313"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"18_CR29","unstructured":"Ruder, S.: An overview of gradient descent optimization algorithms. arXiv preprint arXiv:1609.04747 (2016)"},{"key":"18_CR30","doi-asserted-by":"crossref","unstructured":"Ruiz, N., Li, Y., Jampani, V., Pritch, Y., Rubinstein, M., Aberman, K.: Dreambooth: Fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 22500\u201322510 (2023)","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"Schneider, N., Piewak, F., Stiller, C., Franke, U.: Regnet: Multimodal sensor registration using deep neural networks. In: 2017 IEEE intelligent vehicles symposium (IV). pp. 1803\u20131810. IEEE (2017)","DOI":"10.1109\/IVS.2017.7995968"},{"key":"18_CR32","first-page":"25278","volume":"35","author":"C Schuhmann","year":"2022","unstructured":"Schuhmann, C., Beaumont, R., Vencu, R., Gordon, C., Wightman, R., Cherti, M., Coombes, T., Katta, A., Mullis, C., Wortsman, M., et al.: Laion-5b: An open large-scale dataset for training next generation image-text models. Adv. Neural. Inf. Process. Syst. 35, 25278\u201325294 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR33","unstructured":"Schuhmann, C., Vencu, R., Beaumont, R., Kaczmarczyk, R., Mullis, C., Katta, A., Coombes, T., Jitsev, J., Komatsuzaki, A.: Laion-400m: Open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:2111.02114 (2021)"},{"key":"18_CR34","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. arXiv preprint arXiv:2010.02502 (2020)"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Chen, H., He, T.: Fcos: Fully convolutional one-stage object detection. In: ICCV. pp. 9627\u20139636 (2019)","DOI":"10.1109\/ICCV.2019.00972"},{"key":"18_CR36","doi-asserted-by":"crossref","unstructured":"Tran, M., Bounsavy, W., Vo, K., Nguyen, A., Nguyen, T., Le, N.: Shapeformer: Shape prior visible-to-amodal transformer-based amodal instance segmentation. arXiv preprint arXiv:2403.11376 (2024)","DOI":"10.1109\/IJCNN60899.2024.10650837"},{"key":"18_CR37","unstructured":"Tran, M., Vo, K., Yamazaki, K., Fernandes, A., Kidd, M., Le, N.: Aisformer: Amodal instance segmentation with transformer. arXiv preprint arXiv:2210.06323 (2022)"},{"key":"18_CR38","unstructured":"Wu, Y., Kirillov, A., Massa, F., Lo, W.Y., Girshick, R.: Detectron2. https:\/\/github.com\/facebookresearch\/detectron2 (2019)"},{"key":"18_CR39","doi-asserted-by":"crossref","unstructured":"Xiao, Y., Xu, Y., Zhong, Z., Luo, W., Li, J., Gao, S.: Amodal segmentation based on visible region segmentation and shape prior. arXiv preprint arXiv:2012.05598 (2020)","DOI":"10.1609\/aaai.v35i4.16407"},{"key":"18_CR40","doi-asserted-by":"crossref","unstructured":"Xiao, Y., Xu, Y., Zhong, Z., Luo, W., Li, J., Gao, S.: Amodal segmentation based on visible region segmentation and shape prior. In: AAAI. vol.\u00a035, pp. 2995\u20133003 (2021)","DOI":"10.1609\/aaai.v35i4.16407"},{"key":"18_CR41","doi-asserted-by":"crossref","unstructured":"Xu, J., Liu, S., Vahdat, A., Byeon, W., Wang, X., De\u00a0Mello, S.: Open-vocabulary panoptic segmentation with text-to-image diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 2955\u20132966 (2023)","DOI":"10.1109\/CVPR52729.2023.00289"},{"key":"18_CR42","doi-asserted-by":"crossref","unstructured":"Xu, K., Zhang, L., Shi, J.: Amodal completion via progressive mixed context diffusion. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 9099\u20139109 (2024)","DOI":"10.1109\/CVPR52733.2024.00869"},{"key":"18_CR43","first-page":"6278","volume":"35","author":"J Yao","year":"2022","unstructured":"Yao, J., Hong, Y., Wang, C., Xiao, T., He, T., Locatello, F., Wipf, D.P., Fu, Y., Zhang, Z.: Self-supervised amodal video object segmentation. NeurIPS 35, 6278\u20136291 (2022)","journal-title":"Self-supervised amodal video object segmentation. NeurIPS"},{"key":"18_CR44","doi-asserted-by":"crossref","unstructured":"Zhan, G., Zheng, C., Xie, W., Zisserman, A.: Amodal ground truth and completion in the wild. arXiv preprint arXiv:2312.17247 (2023)","DOI":"10.1109\/CVPR52733.2024.02645"},{"key":"18_CR45","doi-asserted-by":"crossref","unstructured":"Zhan, G., Zheng, C., Xie, W., Zisserman, A.: Amodal ground truth and completion in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 28003\u201328013 (2024)","DOI":"10.1109\/CVPR52733.2024.02645"},{"key":"18_CR46","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Tian, Y., Metaxas, D., Doll\u00e1r, P.: Semantic amodal segmentation. In: CVPR. pp. 1464\u20131472 (2017)","DOI":"10.1109\/CVPR.2017.320"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0972-7_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,9]],"date-time":"2024-12-09T09:10:04Z","timestamp":1733735404000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0972-7_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,10]]},"ISBN":["9789819609710","9789819609727"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0972-7_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,10]]},"assertion":[{"value":"10 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}