{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T04:30:45Z","timestamp":1730349045342,"version":"3.28.0"},"publisher-location":"Cham","reference-count":54,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031733895","type":"print"},{"value":"9783031733901","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T00:00:00Z","timestamp":1730332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73390-1_17","type":"book-chapter","created":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T16:24:01Z","timestamp":1730305441000},"page":"284-300","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["NL2Contact: Natural Language Guided 3D Hand-Object Contact Modeling with\u00a0Diffusion Model"],"prefix":"10.1007","author":[{"given":"Zhongqun","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Hengfei","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Ziwei","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Yihua","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"Angela","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Hyung Jin","family":"Chang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,31]]},"reference":[{"key":"17_CR1","unstructured":"https:\/\/openai.com\/blog\/chatgpt\/"},{"key":"17_CR2","doi-asserted-by":"crossref","unstructured":"Ahn, H., Ha, T., Choi, Y., Yoo, H., Oh, S.: Text2action: generative adversarial synthesis from language to action. In: ICRA (2018)","DOI":"10.1109\/ICRA.2018.8460608"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Brahmbhatt, S., Ham, C., Kemp, C.C., Hays, J.: ContactDB: analyzing and predicting grasp contact via thermal imaging. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00891"},{"key":"17_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1007\/978-3-030-58601-0_22","volume-title":"Computer Vision \u2013 ECCV 2020","author":"S Brahmbhatt","year":"2020","unstructured":"Brahmbhatt, S., Tang, C., Twigg, C.D., Kemp, C.C., Hays, J.: ContactPose: a dataset of grasps with object contact and hand pose. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12358, pp. 361\u2013378. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58601-0_22"},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Calli, B., Singh, A., Walsman, A., Srinivasa, S., Abbeel, P., Dollar, A.M.: The ycb object and model set: towards common benchmarks for manipulation research. In: ICAR (2015)","DOI":"10.1109\/ICAR.2015.7251504"},{"key":"17_CR6","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"Cheang, C., Lin, H., Fu, Y., Xue, X.: Learning 6-dof object poses to grasp category-level objects by language instructions. In: ICRA (2022)","DOI":"10.1109\/ICRA46639.2022.9811367"},{"key":"17_CR8","doi-asserted-by":"crossref","unstructured":"Corona, E., Pumarola, A., Alenya, G., Moreno-Noguer, F., Rogez, G.: Ganhand: Predicting human grasp affordances in multi-object scenes. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00508"},{"key":"17_CR9","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Grady, P., Tang, C., Twigg, C.D., Vo, M., Brahmbhatt, S., Kemp, C.C.: ContactOpt: Optimizing contact to improve grasps. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00152"},{"key":"17_CR11","doi-asserted-by":"crossref","unstructured":"Guo, C., et al.: Generating diverse and natural 3d human motions from text. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00509"},{"key":"17_CR12","unstructured":"Ha, H., Florence, P., Song, S.: Scaling up and distilling down: Language-guided robot skill acquisition. CoRL (2023)"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Hampali, S., Rad, M., Oberweger, M., Lepetit, V.: Honnotate: a method for 3D annotation of hand and object poses. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00326"},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Hasson, Y., Varol, G., Laptev, I., Schmid, C.: Towards unconstrained joint hand-object reconstruction from RGB videos. In: 3DV (2021)","DOI":"10.1109\/3DV53792.2021.00075"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"Hasson, Y., Varol, G., Tzionas, D., Kalevatykh, I., Black, M.J., Laptev, I., Schmid, C.: Learning joint reconstruction of hands and manipulated objects. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01208"},{"key":"17_CR16","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. NeurIPS (2020)"},{"key":"17_CR17","doi-asserted-by":"crossref","unstructured":"Jian, J., Liu, X., Li, M., Hu, R., Liu, J.: Affordpose: a large-scale dataset of hand-object interactions with affordance-driven hand pose. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01352"},{"key":"17_CR18","doi-asserted-by":"crossref","unstructured":"Jiang, H., Liu, S., Wang, J., Wang, X.: Hand-object contact consistency reasoning for human grasps generation. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01092"},{"key":"17_CR19","doi-asserted-by":"crossref","unstructured":"Karunratanakul, K., Preechakul, K., Suwajanakorn, S., Tang, S.: Guided motion diffusion for controllable human motion synthesis. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00205"},{"key":"17_CR20","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: ICLR (2015)"},{"key":"17_CR21","first-page":"22199","volume":"35","author":"T Kojima","year":"2022","unstructured":"Kojima, T., Gu, S.S., Reid, M., Matsuo, Y., Iwasawa, Y.: Large language models are zero-shot reasoners. Adv. Neural. Inf. Process. Syst. 35, 22199\u201322213 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Kong, H., Gong, K., Lian, D., Mi, M.B., Wang, X.: Priority-centric human motion generation in discrete latent space. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01360"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Lakshmipathy, A.S., Feng, N., Lee, Y.X., Mahler, M., Pollard, N.: Contact edit: Artist tools for intuitive modeling of hand-object interactions. ACM Trans. Graph. (TOG) (2023)","DOI":"10.1145\/3592117"},{"key":"17_CR24","doi-asserted-by":"crossref","unstructured":"Li, H., Lin, X., Zhou, Y., Li, X., Huo, Y., Chen, J., Ye, Q.: Contact2grasp: 3d grasp synthesis via hand-object contact constraint. IJCAI (2022)","DOI":"10.24963\/ijcai.2023\/117"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Liu, N., Li, S., Du, Y., Torralba, A., Tenenbaum, J.B.: Compositional visual generation with composable diffusion models. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19790-1_26"},{"key":"17_CR26","doi-asserted-by":"crossref","unstructured":"Liu, S., Jiang, H., Xu, J., Liu, S., Wang, X.: Semi-supervised 3D hand-object poses estimation with interactions in time. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01445"},{"key":"17_CR27","doi-asserted-by":"crossref","unstructured":"Liu, S., Zhou, Y., Yang, J., Gupta, S., Wang, S.: Contactgen: Generative contact modeling for grasp generation. In: CVPR (2023)","DOI":"10.1109\/ICCV51070.2023.01884"},{"key":"17_CR28","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Hoi4d: a 4d egocentric dataset for category-level human-object interaction. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.02034"},{"key":"17_CR29","doi-asserted-by":"crossref","unstructured":"Pavlakos, G., et al.: Expressive body capture: 3d hands, face, and body from a single image. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.01123"},{"key":"17_CR30","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: PointNet: deep learning on point sets for 3D classification and segmentation. In: CVPR (2017)"},{"key":"17_CR31","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: PointNet++: deep hierarchical feature learning on point sets in a metric space. In: NeurIPS (2017)"},{"key":"17_CR32","doi-asserted-by":"publisher","unstructured":"Qin, Y., et al.: Dexmv: Imitation learning for dexterous manipulation from human videos. In: ECCV, pp. 570\u2013587 (2022). https:\/\/doi.org\/10.1007\/978-3-031-19842-7_33","DOI":"10.1007\/978-3-031-19842-7_33"},{"key":"17_CR33","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"17_CR34","doi-asserted-by":"crossref","unstructured":"Romero, J., Tzionas, D., Black, M.J.: Embodied hands: modeling and capturing hands and bodies together. ACM Trans. Graph. (ToG) (2017)","DOI":"10.1145\/3130800.3130883"},{"key":"17_CR35","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation. In: MICCAI (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"17_CR36","doi-asserted-by":"crossref","unstructured":"Sener, F., et al.: Assembly101: a large-scale multi-view video dataset for understanding procedural activities. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.02042"},{"key":"17_CR37","doi-asserted-by":"crossref","unstructured":"Taheri, O., Ghorbani, N., Black, M.J., Tzionas, D.: GRAB: a dataset of whole-body human grasping of objects. In: ECCV (2020)","DOI":"10.1007\/978-3-030-58548-8_34"},{"key":"17_CR38","doi-asserted-by":"crossref","unstructured":"Tang, C., Huang, D., Ge, W., Liu, W., Zhang, H.: Graspgpt: Leveraging semantic knowledge from a large language model for task-oriented grasping. IEEE Robotics and Automation Letters (2023)","DOI":"10.1109\/LRA.2023.3320012"},{"key":"17_CR39","doi-asserted-by":"crossref","unstructured":"Tendulkar, P., Sur\u00eds, D., Vondrick, C.: Flex: full-body grasping without full-body grasps. In: CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.02029"},{"key":"17_CR40","doi-asserted-by":"crossref","unstructured":"Tse, T.H.E., Kim, K.I., Leonardis, A., Chang, H.J.: Collaborative learning for hand and object reconstruction with attention-guided graph convolution. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00171"},{"key":"17_CR41","unstructured":"Tse, T.H.E., et\u00a0al.: Spectral graphormer: spectral graph-based transformer for egocentric two-hand reconstruction using multi-view color images. In: ICCV (2023)"},{"key":"17_CR42","doi-asserted-by":"crossref","unstructured":"Tse, T.H.E., Zhang, Z., Kim, K.I., Leonardis, A., Zheng, F., Chang, H.J.: S2Contact: graph-based network for 3d hand-object contact estimation with semi-supervised learning. In: ECCV (2022)","DOI":"10.1007\/978-3-031-19769-7_33"},{"key":"17_CR43","unstructured":"Van Den\u00a0Oord, A., Vinyals, O., et\u00a0al.: Neural discrete representation learning. NeurIPS (2017)"},{"key":"17_CR44","unstructured":"Wang, H., Zhang, Z., Cheng, Y., Chang, H.J.: High-fidelity eye animatable neural radiance fields for human face. BMVC (2023)"},{"key":"17_CR45","doi-asserted-by":"crossref","unstructured":"Wang, H., Zhang, Z., Cheng, Y., Chang, H.J.: Textgaze: gaze-controllable face generation with natural language. MM (2024)","DOI":"10.1145\/3664647.3681252"},{"key":"17_CR46","doi-asserted-by":"crossref","unstructured":"Wu, Y., Wang, J., Zhang, Y., Zhang, S., Hilliges, O., Yu, F., Tang, S.: Saga: Stochastic whole-body grasping with contact. In: ECCV (2022)","DOI":"10.1007\/978-3-031-20068-7_15"},{"key":"17_CR47","doi-asserted-by":"crossref","unstructured":"Xie, W., Zhao, Z., Li, S., Zuo, B., Wang, Y.: Nonrigid object contact estimation with regional unwrapping transformer. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.00857"},{"key":"17_CR48","doi-asserted-by":"crossref","unstructured":"Yang, L., et al.: Oakink: a large-scale knowledge repository for understanding hand-object interaction. In: CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.02028"},{"key":"17_CR49","doi-asserted-by":"crossref","unstructured":"Yang, L., Zhan, X., Li, K., Xu, W., Li, J., Lu, C.: CPF: Learning a contact potential field to model the hand-object interaction. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01091"},{"key":"17_CR50","doi-asserted-by":"crossref","unstructured":"Ye, Y., Hebbar, P., Gupta, A., Tulsiani, S.: Diffusion-guided reconstruction of everyday hand-object interaction clips. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01806"},{"key":"17_CR51","unstructured":"Yu, Z., Yang, L., Xie, Y., Chen, P., Yao, A.: Uv-based 3d hand-object reconstruction with grasp optimization. BMVC (2022)"},{"key":"17_CR52","doi-asserted-by":"crossref","unstructured":"Zhang, H., Ye, Y., Shiratori, T., Komura, T.: Manipnet: neural manipulation synthesis with a hand-object spatial representation. ACM Trans. Graph. (ToG) (2021)","DOI":"10.1145\/3476576.3476690"},{"key":"17_CR53","doi-asserted-by":"crossref","unstructured":"Zhou, K., Bhatnagar, B.L., Lenssen, J.E., Pons-Moll, G.: TOCH: Spatio-temporal object-to-hand correspondence for motion refinement. In: ECCV (2022)","DOI":"10.1007\/978-3-031-20062-5_1"},{"key":"17_CR54","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Wang, J., Qin, Y., Sun, D., Jampani, V., Wang, X.: Contactart: Learning 3d interaction priors for category-level articulated object and hand poses estimation. 3DV (2024)","DOI":"10.1109\/3DV62453.2024.00028"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73390-1_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T16:33:30Z","timestamp":1730306010000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73390-1_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,31]]},"ISBN":["9783031733895","9783031733901"],"references-count":54,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73390-1_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,31]]},"assertion":[{"value":"31 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}