{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,7,28]],"date-time":"2024-07-28T02:16:30Z","timestamp":1722132990573},"reference-count":84,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2021,1]]},"DOI":"10.1016\/j.neucom.2020.08.014","type":"journal-article","created":{"date-parts":[[2020,9,3]],"date-time":"2020-09-03T01:07:05Z","timestamp":1599095225000},"page":"9-22","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":7,"special_numbering":"C","title":["CASINet: Content-Adaptive Scale Interaction Networks for scene parsing"],"prefix":"10.1016","volume":"419","author":[{"given":"Xin","family":"Jin","sequence":"first","affiliation":[]},{"given":"Cuiling","family":"Lan","sequence":"additional","affiliation":[]},{"given":"Wenjun","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"Zhizheng","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Zhibo","family":"Chen","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neucom.2020.08.014_b0005","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1016\/j.neucom.2019.02.003","article-title":"Survey on semantic segmentation using deep learning techniques","volume":"338","author":"Lateef","year":"2019","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0010","doi-asserted-by":"crossref","first-page":"34","DOI":"10.1016\/j.neucom.2019.01.103","article-title":"Aleatoric uncertainty estimation with test-time augmentation for medical image segmentation with convolutional neural networks","volume":"338","author":"Wang","year":"2019","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0015","article-title":"Weakly supervised semantic segmentation by iterative superpixel-crf refinement with initial clues guiding","author":"Li","year":"2020","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0020","article-title":"Mfenet: Multi-level feature enhancement network for real-time semantic segmentation","author":"Zhang","year":"2020","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0025","unstructured":"K. Sun, Y. Zhao, B. Jiang, T. Cheng, B. Xiao, D. Liu, Y. Mu, X. Wang, W. Liu, J. Wang, High-resolution representations for labeling pixels and regions, arXiv preprint arXiv:1904.04514, 2019."},{"key":"10.1016\/j.neucom.2020.08.014_b0030","unstructured":"X. Li, L. Zhang, A. You, M. Yang, K. Yang, Y. Tong, Global aggregation then local distribution in fully convolutional networks, BMVC, 2019."},{"key":"10.1016\/j.neucom.2020.08.014_b0035","doi-asserted-by":"crossref","first-page":"128","DOI":"10.1016\/j.neucom.2015.06.114","article-title":"Video object segmentation based on supervoxel for multimedia corpus construction","volume":"215","author":"Tian","year":"2016","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0040","first-page":"992","article-title":"Transductive video segmentation on tree-structured model","volume":"27","author":"Wang","year":"2016","journal-title":"IEEE TCSVT"},{"key":"10.1016\/j.neucom.2020.08.014_b0045","first-page":"1491","article-title":"Interactive video segmentation via local appearance model","volume":"27","author":"Sun","year":"2016","journal-title":"IEEE TCSVT"},{"key":"10.1016\/j.neucom.2020.08.014_b0050","series-title":"CVPR","first-page":"8856","article-title":"Improving semantic segmentation via video propagation and label relaxation","author":"Zhu","year":"2019"},{"key":"10.1016\/j.neucom.2020.08.014_b0055","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1016\/j.neucom.2019.01.003","article-title":"Refined video segmentation through global appearance regression","volume":"334","author":"Zhang","year":"2019","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0060","doi-asserted-by":"crossref","first-page":"20","DOI":"10.1016\/j.neucom.2019.11.019","article-title":"Deep clustering for weakly-supervised semantic segmentation in autonomous driving scenes","volume":"381","author":"Wang","year":"2020","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0065","doi-asserted-by":"crossref","first-page":"316","DOI":"10.1016\/j.neucom.2018.06.059","article-title":"Road segmentation for all-day outdoor robot navigation","volume":"314","author":"Zhang","year":"2018","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0070","doi-asserted-by":"crossref","unstructured":"Y.J. Park, Y. Yang, H. Ro, J. Byun, S. Chae, T.D. Han, Meet ar-bot: Meeting anywhere, anytime with movable spatial ar robot, in: 2018 ACM Multimedia, ACM, 2018, pp. 1242\u20131243.","DOI":"10.1145\/3240508.3241390"},{"key":"10.1016\/j.neucom.2020.08.014_b0075","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1016\/j.neucom.2019.04.001","article-title":"Weakly supervised precise segmentation for historical document images","volume":"350","author":"Xie","year":"2019","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0080","doi-asserted-by":"crossref","first-page":"476","DOI":"10.1016\/j.neucom.2018.11.004","article-title":"A multimodal fusion approach for image captioning","volume":"329","author":"Zhao","year":"2019","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0085","series-title":"CVPR","first-page":"3431","article-title":"Fully convolutional networks for semantic segmentation","author":"Long","year":"2015"},{"key":"10.1016\/j.neucom.2020.08.014_b0090","doi-asserted-by":"crossref","unstructured":"R. Zhang, S. Tang, Y. Zhang, J. Li, S. Yan, Scale-adaptive convolutions for scene parsing, in: ICCV, 2017, pp. 2031\u20132039.","DOI":"10.1109\/ICCV.2017.224"},{"key":"10.1016\/j.neucom.2020.08.014_b0095","series-title":"CVPR","first-page":"2881","article-title":"Pyramid scene parsing network","author":"Zhao","year":"2017"},{"key":"10.1016\/j.neucom.2020.08.014_b0100","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","article-title":"Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs","volume":"40","author":"Chen","year":"2018","journal-title":"IEEE TPAMI"},{"key":"10.1016\/j.neucom.2020.08.014_b0105","series-title":"CVPR","first-page":"3684","article-title":"Denseaspp for semantic segmentation in street scenes","author":"Yang","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0110","unstructured":"L.-C. Chen, G. Papandreou, F. Schroff, H. Adam, Rethinking atrous convolution for semantic image segmentation, arXiv preprint arXiv:1706.05587, 2017."},{"key":"10.1016\/j.neucom.2020.08.014_b0115","unstructured":"L.-C. Chen, G. Papandreou, I. Kokkinos, K. Murphy, A.L. Yuille, Semantic image segmentation with deep convolutional nets and fully connected crfs, ICLR, 2015."},{"key":"10.1016\/j.neucom.2020.08.014_b0120","series-title":"CVPR","first-page":"7794","article-title":"Non-local neural networks","author":"Wang","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0125","series-title":"CVPR","first-page":"3213","article-title":"The cityscapes dataset for semantic urban scene understanding","author":"Cordts","year":"2016"},{"key":"10.1016\/j.neucom.2020.08.014_b0130","series-title":"CVPR","first-page":"633","article-title":"Scene parsing through ade20k dataset","author":"Zhou","year":"2017"},{"key":"10.1016\/j.neucom.2020.08.014_b0135","series-title":"CVPR","first-page":"932","article-title":"Look into person: Self-supervised structure-sensitive learning and a new benchmark for human parsing","author":"Gong","year":"2017"},{"key":"10.1016\/j.neucom.2020.08.014_b0140","first-page":"2169","article-title":"Beyond bags of features: spatial pyramid matching for recognizing natural scene categories","volume":"2","author":"Lazebnik","year":"2006","journal-title":"CVPR"},{"key":"10.1016\/j.neucom.2020.08.014_b0145","series-title":"ECCV","first-page":"715","article-title":"Pyramid dilated deeper convlstm for video salient object detection","author":"Song","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0150","series-title":"CVPR","first-page":"1448","article-title":"Salient object detection with pyramid attention and salient edges","author":"Wang","year":"2019"},{"key":"10.1016\/j.neucom.2020.08.014_b0155","series-title":"CVPR","first-page":"4353","article-title":"Large kernel matters\u2013improve semantic segmentation by global convolutional network","author":"Peng","year":"2017"},{"key":"10.1016\/j.neucom.2020.08.014_b0160","series-title":"CVPR","first-page":"1925","article-title":"Refinenet: Multi-path refinement networks for high-resolution semantic segmentation","author":"Lin","year":"2017"},{"key":"10.1016\/j.neucom.2020.08.014_b0165","series-title":"CVPR","first-page":"2393","article-title":"Context contrasted feature and gated multi-scale aggregation for scene segmentation","author":"Ding","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0170","doi-asserted-by":"crossref","first-page":"1480","DOI":"10.1109\/TPAMI.2017.2712691","article-title":"Scene segmentation with dag-recurrent neural networks","volume":"40","author":"Shuai","year":"2018","journal-title":"IEEE TPAMI"},{"key":"10.1016\/j.neucom.2020.08.014_b0175","series-title":"CVPR","first-page":"7151","article-title":"Context encoding for semantic segmentation","author":"Zhang","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0180","series-title":"CVPR","first-page":"82","article-title":"Auto-deeplab: Hierarchical neural architecture search for semantic image segmentation","author":"Liu","year":"2019"},{"key":"10.1016\/j.neucom.2020.08.014_b0185","first-page":"60","article-title":"A non-local algorithm for image denoising","volume":"2","author":"Buades","year":"2005","journal-title":"CVPR"},{"key":"10.1016\/j.neucom.2020.08.014_b0190","first-page":"2080","article-title":"Image denoising by sparse 3-d transform-domain collaborative filtering","volume":"16","author":"Dabov","year":"2007","journal-title":"IEEE TIP"},{"key":"10.1016\/j.neucom.2020.08.014_b0195","doi-asserted-by":"crossref","unstructured":"A.A. Efros, T.K. Leung, Texture synthesis by non-parametric sampling, in: ICCV, vol. 2, 1999, pp. 1033\u20131038.","DOI":"10.1109\/ICCV.1999.790383"},{"key":"10.1016\/j.neucom.2020.08.014_b0200","series-title":"CVPR","first-page":"3146","article-title":"Dual attention network for scene segmentation","author":"Fu","year":"2019"},{"key":"10.1016\/j.neucom.2020.08.014_b0205","unstructured":"Y. Yuan, J. Wang, Ocnet: Object context network for scene parsing, arXiv preprint arXiv:1809.00916, 2018."},{"key":"10.1016\/j.neucom.2020.08.014_b0210","series-title":"NeurIPS","first-page":"577","article-title":"Attention-based models for speech recognition","author":"Chorowski","year":"2015"},{"key":"10.1016\/j.neucom.2020.08.014_b0215","doi-asserted-by":"crossref","first-page":"182","DOI":"10.1016\/j.neucom.2019.12.042","article-title":"Dynamic attention network for semantic segmentation","volume":"384","author":"Wu","year":"2020","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0220","unstructured":"K. Xu, J. Ba, R. Kiros, K. Cho, A. Courville, R. Salakhudinov, R. Zemel, Y. Bengio, Show, attend and tell: Neural image caption generation with visual attention, in: ICML, 2015, pp. 2048\u20132057."},{"key":"10.1016\/j.neucom.2020.08.014_b0225","series-title":"ECCV","first-page":"451","article-title":"Ask, attend and answer: exploring question-guided spatial attention for visual question answering","author":"Xu","year":"2016"},{"key":"10.1016\/j.neucom.2020.08.014_b0230","series-title":"CVPR","first-page":"3156","article-title":"Residual attention network for image classification","author":"Wang","year":"2017"},{"key":"10.1016\/j.neucom.2020.08.014_b0235","series-title":"CVPR","first-page":"7132","article-title":"Squeeze-and-excitation networks","author":"Hu","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0240","article-title":"Traffic scene semantic segmentation using self-attention mechanism and bi-directional gru to correlate context","author":"Yan","year":"2019","journal-title":"Neurocomputing"},{"key":"10.1016\/j.neucom.2020.08.014_b0245","series-title":"CVPR","first-page":"3640","article-title":"Attention to scale: Scale-aware semantic image segmentation","author":"Chen","year":"2016"},{"key":"10.1016\/j.neucom.2020.08.014_b0250","series-title":"CVPR","first-page":"956","article-title":"Recurrent scene parsing with perspective understanding in the loop","author":"Kong","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0255","series-title":"WACV","first-page":"1024","article-title":"Pixel-wise attentional gating for scene parsing","author":"Kong","year":"2019"},{"key":"10.1016\/j.neucom.2020.08.014_b0260","doi-asserted-by":"crossref","unstructured":"Y. Pang, Y. Li, J. Shen, L. Shao, Towards bridging semantic gap to improve semantic segmentation, in: ICCV, 2019, pp. 4230\u20134239.","DOI":"10.1109\/ICCV.2019.00433"},{"key":"10.1016\/j.neucom.2020.08.014_b0265","unstructured":"H. Li, P. Xiong, J. An, L. Wang, Pyramid attention network for semantic segmentation, BMVC, 2018."},{"key":"10.1016\/j.neucom.2020.08.014_b0270","doi-asserted-by":"crossref","unstructured":"Z. Zhu, M. Xu, S. Bai, T. Huang, X. Bai, Asymmetric non-local neural networks for semantic segmentation, in: ICCV, 2019, pp. 593\u2013602.","DOI":"10.1109\/ICCV.2019.00068"},{"key":"10.1016\/j.neucom.2020.08.014_b0275","doi-asserted-by":"crossref","unstructured":"Z. Huang, X. Wang, L. Huang, C. Huang, Y. Wei, W. Liu, Ccnet: Criss-cross attention for semantic segmentation, in: ICCV, 2019, pp. 603\u2013612.","DOI":"10.1109\/ICCV.2019.00069"},{"key":"10.1016\/j.neucom.2020.08.014_b0280","series-title":"CVPR","first-page":"4046","article-title":"Context-reinforced semantic segmentation","author":"Zhou","year":"2019"},{"key":"10.1016\/j.neucom.2020.08.014_b0285","series-title":"CVPR","first-page":"7392","article-title":"Triply supervised decoder networks for joint detection and segmentation","author":"Cao","year":"2019"},{"key":"10.1016\/j.neucom.2020.08.014_b0290","doi-asserted-by":"crossref","unstructured":"J. Dai, H. Qi, Y. Xiong, Y. Li, G. Zhang, H. Hu, Y. Wei, Deformable convolutional networks, in: ICCV, 2017, pp. 764\u2013773.","DOI":"10.1109\/ICCV.2017.89"},{"key":"10.1016\/j.neucom.2020.08.014_b0295","series-title":"NeurIPS","first-page":"667","article-title":"Dynamic filter networks","author":"Jia","year":"2016"},{"key":"10.1016\/j.neucom.2020.08.014_b0300","series-title":"CVPR","first-page":"11166","article-title":"Pixel-adaptive convolutional neural networks","author":"Su","year":"2019"},{"key":"10.1016\/j.neucom.2020.08.014_b0305","article-title":"A new semantic segmentation model for supplementing more spatial information","author":"Han","year":"2019","journal-title":"IEEE Access"},{"key":"10.1016\/j.neucom.2020.08.014_b0310","series-title":"CVPR","first-page":"5639","article-title":"In-place activated batchnorm for memory-optimized training of dnns","author":"Rota Bul\u00f2","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0315","unstructured":"Z. Wu, C. Shen, A. v. d. Hengel, High-performance semantic segmentation using very deep fully convolutional networks, arXiv preprint arXiv:1604.04339, 2016."},{"key":"10.1016\/j.neucom.2020.08.014_b0320","series-title":"Devil in the details: towards accurate single and multiple human parsing","author":"Liu","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0325","series-title":"CVPR","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2016"},{"key":"10.1016\/j.neucom.2020.08.014_b0330","series-title":"CVPR","first-page":"4151","article-title":"Full-resolution residual networks for semantic segmentation in street scenes","author":"Pohlen","year":"2017"},{"key":"10.1016\/j.neucom.2020.08.014_b0335","series-title":"WACV","first-page":"1451","article-title":"Understanding convolution for semantic segmentation","author":"Wang","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0340","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1016\/j.patcog.2019.01.006","article-title":"Wider or deeper: revisiting the resnet model for visual recognition","volume":"90","author":"Wu","year":"2019","journal-title":"Pattern Recognition"},{"key":"10.1016\/j.neucom.2020.08.014_b0345","series-title":"CVPR","first-page":"752","article-title":"Dynamic-structured semantic propagation network","author":"Liang","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0350","series-title":"ECCV","first-page":"325","article-title":"Bisenet: Bilateral segmentation network for real-time semantic segmentation","author":"Yu","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0355","series-title":"CVPR","first-page":"1857","article-title":"Learning a discriminative feature network for semantic segmentation","author":"Yu","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0360","series-title":"ECCV","first-page":"267","article-title":"Psanet: Point-wise spatial attention network for scene parsing","author":"Zhao","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0365","doi-asserted-by":"crossref","unstructured":"F. Zhang, Y. Chen, Z. Li, Z. Hong, J. Liu, F. Ma, J. Han, E. Ding, Acfnet: Attentional class feature network for semantic segmentation, in: ICCV, 2019, pp. 6798\u20136807.","DOI":"10.1109\/ICCV.2019.00690"},{"key":"10.1016\/j.neucom.2020.08.014_b0370","unstructured":"L. Zhang, X. Li, A. Arnab, K. Yang, Y. Tong, P.H. Torr, Dual graph convolutional network for semantic segmentation, BMVC, 2019."},{"key":"10.1016\/j.neucom.2020.08.014_b0375","series-title":"CVPR","first-page":"4700","article-title":"Densely connected convolutional networks","author":"Huang","year":"2017"},{"key":"10.1016\/j.neucom.2020.08.014_b0380","series-title":"ECCV","first-page":"418","article-title":"Unified perceptual parsing for scene understanding","author":"Xiao","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0385","series-title":"CVPR","first-page":"548","article-title":"Co-occurrent features in semantic segmentation","author":"Zhang","year":"2019"},{"key":"10.1016\/j.neucom.2020.08.014_b0390","series-title":"CVPR","first-page":"7519","article-title":"Adaptive pyramid context network for semantic segmentation","author":"He","year":"2019"},{"key":"10.1016\/j.neucom.2020.08.014_b0395","doi-asserted-by":"crossref","unstructured":"W. Wang, Z. Zhang, S. Qi, J. Shen, Y. Pang, L. Shao, Learning compositional neural information fusion for human parsing, in: ICCV, 2019, pp. 5703\u20135713.","DOI":"10.1109\/ICCV.2019.00580"},{"key":"10.1016\/j.neucom.2020.08.014_b0400","series-title":"CVPR","first-page":"8929","article-title":"Hierarchical human parsing with typed part-relation reasoning","author":"Wang","year":"2020"},{"key":"10.1016\/j.neucom.2020.08.014_b0405","article-title":"Look into person: Joint body parsing & pose estimation network and a new benchmark","author":"Liang","year":"2018","journal-title":"IEEE TPAMI"},{"key":"10.1016\/j.neucom.2020.08.014_b0410","series-title":"CVPR Workshops","first-page":"7","article-title":"Self-supervised neural aggregation networks for human parsing","author":"Zhao","year":"2017"},{"key":"10.1016\/j.neucom.2020.08.014_b0415","series-title":"ECCV","first-page":"418","article-title":"Macro-micro adversarial network for human parsing","author":"Luo","year":"2018"},{"key":"10.1016\/j.neucom.2020.08.014_b0420","series-title":"ECCV","first-page":"502","article-title":"Mutual learning to adapt for joint human parsing and pose estimation","author":"Nie","year":"2018"}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231220312741?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231220312741?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2020,11,12]],"date-time":"2020-11-12T04:21:05Z","timestamp":1605154865000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231220312741"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1]]},"references-count":84,"alternative-id":["S0925231220312741"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2020.08.014","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2021,1]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"CASINet: Content-Adaptive Scale Interaction Networks for scene parsing","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2020.08.014","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2020 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}]}}