{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,2]],"date-time":"2024-07-02T15:02:41Z","timestamp":1719932561503},"reference-count":42,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2020,7,1]],"date-time":"2020-07-01T00:00:00Z","timestamp":1593561600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Journal of Visual Communication and Image Representation"],"published-print":{"date-parts":[[2020,7]]},"DOI":"10.1016\/j.jvcir.2020.102790","type":"journal-article","created":{"date-parts":[[2020,3,11]],"date-time":"2020-03-11T07:12:29Z","timestamp":1583910749000},"page":"102790","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":2,"special_numbering":"C","title":["Learning latent geometric consistency for 6D object pose estimation in heavily cluttered scenes"],"prefix":"10.1016","volume":"70","author":[{"given":"Qingnan","family":"Li","sequence":"first","affiliation":[]},{"given":"Ruimin","family":"Hu","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-0833-5679","authenticated-orcid":false,"given":"Jing","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"Zhongyuan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Chen","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.jvcir.2020.102790_b0005","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"2686","article-title":"Render for cnn: viewpoint estimation in images using cnns trained with rendered 3d model views","author":"Su","year":"2015"},{"key":"10.1016\/j.jvcir.2020.102790_b0010","doi-asserted-by":"crossref","unstructured":"M. Schwarz, H. Schulz, S. Behnke, Rgb-d object recognition and pose estimation based on pre-trained convolutional neural network features, in: 2015 IEEE international conference on robotics and automation (ICRA), IEEE, 2015, pp. 1329\u20131335.","DOI":"10.1109\/ICRA.2015.7139363"},{"key":"10.1016\/j.jvcir.2020.102790_b0015","doi-asserted-by":"crossref","unstructured":"S. Tulsiani, J. Malik, Viewpoints and keypoints, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2015, pp. 1510\u20131519.","DOI":"10.1109\/CVPR.2015.7298758"},{"key":"10.1016\/j.jvcir.2020.102790_b0020","doi-asserted-by":"crossref","unstructured":"Y. Xiang, T. Schmidt, V. Narayanan, D. Fox, Posecnn: A convolutional neural network for 6d object pose estimation in cluttered scenes, arXiv preprint arXiv:1711.00199 (2017).","DOI":"10.15607\/RSS.2018.XIV.019"},{"key":"10.1016\/j.jvcir.2020.102790_b0025","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"7074","article-title":"3d bounding box estimation using deep learning and geometry","author":"Mousavian","year":"2017"},{"key":"10.1016\/j.jvcir.2020.102790_b0030","series-title":"2018 IEEE International Conference on Robotics and Automation (ICRA), IEEE","first-page":"1","article-title":"When regression meets manifold learning for object recognition and pose estimation","author":"Bui","year":"2018"},{"key":"10.1016\/j.jvcir.2020.102790_b0035","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"652","article-title":"Pointnet: deep learning on point sets for 3d classification and segmentation","author":"Qi","year":"2017"},{"key":"10.1016\/j.jvcir.2020.102790_b0040","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"918","article-title":"Frustum pointnets for 3d object detection from rgb-d data","author":"Qi","year":"2018"},{"key":"10.1016\/j.jvcir.2020.102790_b0045","doi-asserted-by":"crossref","first-page":"1284","DOI":"10.1177\/0278364911401765","article-title":"The moped framework: object recognition and pose estimation for manipulation","volume":"30","author":"Collet","year":"2011","journal-title":"Int. J. Robot. Res."},{"key":"10.1016\/j.jvcir.2020.102790_b0050","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1007\/s11263-005-3964-7","article-title":"Simultaneous object recognition and segmentation from single or multiple model views","volume":"67","author":"Ferrari","year":"2006","journal-title":"Int. J. Comput. Vision"},{"key":"10.1016\/j.jvcir.2020.102790_b0055","doi-asserted-by":"crossref","first-page":"231","DOI":"10.1007\/s11263-005-3674-1","article-title":"3d object modeling and recognition using local affine-invariant image descriptors and multi-view spatial constraints","volume":"66","author":"Rothganger","year":"2006","journal-title":"Int. J. Comput. Vision"},{"key":"10.1016\/j.jvcir.2020.102790_b0060","series-title":"European Conference on Computer Vision","first-page":"408","article-title":"Discriminative mixture-of-templates for viewpoint classification","author":"Gu","year":"2010"},{"key":"10.1016\/j.jvcir.2020.102790_b0065","series-title":"IEEE Winter Conference on Applications of Computer Vision","first-page":"75","article-title":"Beyond pascal: a benchmark for 3d object detection in the wild","author":"Xiang","year":"2014"},{"key":"10.1016\/j.jvcir.2020.102790_b0070","doi-asserted-by":"crossref","first-page":"155","DOI":"10.1007\/s11263-008-0152-6","article-title":"Epnp: An accurate o (n) solution to the pnp problem","volume":"81","author":"Lepetit","year":"2009","journal-title":"Int J Comput Vision"},{"key":"10.1016\/j.jvcir.2020.102790_b0075","first-page":"726","article-title":"Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography","author":"Fischler","year":"1987","journal-title":"Readings Comput Vision"},{"key":"10.1016\/j.jvcir.2020.102790_b0080","doi-asserted-by":"crossref","unstructured":"G. Pavlakos, X. Zhou, A. Chan, K.G. Derpanis, K. Daniilidis, 6-dof object pose from semantic keypoints, in: 2017 IEEE International Conference on Robotics and Automation (ICRA), IEEE, 2017, pp. 2011\u20132018.","DOI":"10.1109\/ICRA.2017.7989233"},{"key":"10.1016\/j.jvcir.2020.102790_b0085","doi-asserted-by":"crossref","unstructured":"M. Oberweger, M. Rad, V. Lepetit, Making deep heatmaps robust to partial occlusions for 3d object pose estimation, in: Proceedings of the European Conference on Computer Vision (ECCV), 2018, pp. 119\u2013134.","DOI":"10.1007\/978-3-030-01267-0_8"},{"key":"10.1016\/j.jvcir.2020.102790_b0090","doi-asserted-by":"crossref","unstructured":"B. Tekin, S.N. Sinha, P. Fua, Real-time seamless single shot 6d object pose prediction, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 292\u2013301.","DOI":"10.1109\/CVPR.2018.00038"},{"key":"10.1016\/j.jvcir.2020.102790_b0095","unstructured":"S. Suwajanakorn, N. Snavely, J.J. Tompson, M. Norouzi, Discovery of latent 3d keypoints via end-to-end geometric reasoning, in: Advances in Neural Information Processing Systems, 2018, pp. 2059\u20132070."},{"key":"10.1016\/j.jvcir.2020.102790_b0100","doi-asserted-by":"crossref","unstructured":"S. Peng, Y. Liu, Q. Huang, X. Zhou, H. Bao, Pvnet: Pixel-wise voting network for 6dof pose estimation, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2019, pp. 4561\u20134570.","DOI":"10.1109\/CVPR.2019.00469"},{"key":"10.1016\/j.jvcir.2020.102790_b0105","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3385","article-title":"Segmentation-driven 6d object pose estimation","author":"Hu","year":"2019"},{"key":"10.1016\/j.jvcir.2020.102790_b0110","series-title":"European conference on computer vision","first-page":"536","article-title":"Learning 6d object pose estimation using 3d object coordinates","author":"Brachmann","year":"2014"},{"key":"10.1016\/j.jvcir.2020.102790_b0115","series-title":"European Conference on Computer Vision","first-page":"462","article-title":"Latent-class hough forests for 3d object detection and pose estimation","author":"Tejani","year":"2014"},{"key":"10.1016\/j.jvcir.2020.102790_b0120","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3583","article-title":"Recovering 6d object pose and predicting next-best-view in the crowd","author":"Doumanoglou","year":"2016"},{"key":"10.1016\/j.jvcir.2020.102790_b0125","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3343","article-title":"Densefusion: 6d object pose estimation by iterative dense fusion","author":"Wang","year":"2019"},{"key":"10.1016\/j.jvcir.2020.102790_b0130","series-title":"International Conference on Artificial Neural Networks","first-page":"44","article-title":"Transforming auto-encoders","author":"Hinton","year":"2011"},{"key":"10.1016\/j.jvcir.2020.102790_b0135","unstructured":"S. Sabour, N. Frosst, G.E. Hinton, Dynamic routing between capsules, in: Advances in Neural Information Processing Systems, 2017, pp. 3856\u20133866."},{"key":"10.1016\/j.jvcir.2020.102790_b0140","unstructured":"G.E. Hinton, S. Sabour, N. Frosst, Matrix capsules with em routing, 2018."},{"key":"10.1016\/j.jvcir.2020.102790_b0145","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"1009","article-title":"3d point capsule networks","author":"Zhao","year":"2019"},{"key":"10.1016\/j.jvcir.2020.102790_b0150","doi-asserted-by":"crossref","first-page":"3275","DOI":"10.1109\/TNNLS.2018.2890550","article-title":"Separability and compactness network for image recognition and superresolution","volume":"30","author":"Zhou","year":"2019","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.jvcir.2020.102790_b0155","article-title":"Multi-temporal ultra dense memory network for video super-resolution","author":"Yi","year":"2019","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.jvcir.2020.102790_b0160","article-title":"Scscn: a separated channel-spatial convolution net with attention for single-view reconstruction","author":"Ma","year":"2019","journal-title":"IEEE Trans. Industr. Electron."},{"key":"10.1016\/j.jvcir.2020.102790_b0165","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"4974","article-title":"Hybrid task cascade for instance segmentation","author":"Chen","year":"2019"},{"key":"10.1016\/j.jvcir.2020.102790_b0170","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"6409","article-title":"Mask scoring r-cnn","author":"Huang","year":"2019"},{"key":"10.1016\/j.jvcir.2020.102790_b0175","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3126","article-title":"Decoders matter for semantic segmentation: Data-dependent decoding enables flexible feature aggregation","author":"Tian","year":"2019"},{"key":"10.1016\/j.jvcir.2020.102790_b0180","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"1521","article-title":"Ssd-6d: Making rgb-based 3d detection and 6d pose estimation great again","author":"Kehl","year":"2017"},{"key":"10.1016\/j.jvcir.2020.102790_b0185","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"699","article-title":"Implicit 3d orientation learning for 6d object detection from rgb images","author":"Sundermeyer","year":"2018"},{"key":"10.1016\/j.jvcir.2020.102790_b0190","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"244","article-title":"Pointfusion: Deep sensor fusion for 3d bounding box estimation","author":"Xu","year":"2018"},{"key":"10.1016\/j.jvcir.2020.102790_b0195","doi-asserted-by":"crossref","unstructured":"S. Hinterstoisser, S. Holzer, C. Cagniart, S. Ilic, K. Konolige, N. Navab, V. Lepetit, Multimodal templates for real-time detection of texture-less objects in heavily cluttered scenes, in: 2011 international conference on computer vision, IEEE, 2011, pp. 858\u2013865.","DOI":"10.1109\/ICCV.2011.6126326"},{"key":"10.1016\/j.jvcir.2020.102790_b0200","series-title":"Asian Conference on Computer Vision","first-page":"548","article-title":"Model based training, detection and pose estimation of texture-less 3d objects in heavily cluttered scenes","author":"Hinterstoisser","year":"2012"},{"key":"10.1016\/j.jvcir.2020.102790_b0205","doi-asserted-by":"crossref","unstructured":"M. Rad, V. Lepetit, Bb8: A scalable, accurate, robust to partial occlusion method for predicting the 3d poses of challenging objects without using depth, in: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 3828\u20133836.","DOI":"10.1109\/ICCV.2017.413"},{"key":"10.1016\/j.jvcir.2020.102790_b0210","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"254","article-title":"A unified framework for multi-view multi-class object pose estimation","author":"Li","year":"2018"}],"container-title":["Journal of Visual Communication and Image Representation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1047320320300407?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1047320320300407?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2020,7,16]],"date-time":"2020-07-16T22:40:50Z","timestamp":1594939250000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1047320320300407"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7]]},"references-count":42,"alternative-id":["S1047320320300407"],"URL":"https:\/\/doi.org\/10.1016\/j.jvcir.2020.102790","relation":{},"ISSN":["1047-3203"],"issn-type":[{"value":"1047-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2020,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Learning latent geometric consistency for 6D object pose estimation in heavily cluttered scenes","name":"articletitle","label":"Article Title"},{"value":"Journal of Visual Communication and Image Representation","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.jvcir.2020.102790","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2020 Elsevier Inc. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"102790"}}