{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T04:39:52Z","timestamp":1740112792172,"version":"3.37.3"},"reference-count":47,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100011442","name":"State Key Laboratory of Computer Aided Design and Computer Graphics","doi-asserted-by":"publisher","award":["A2329"],"id":[{"id":"10.13039\/501100011442","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004835","name":"Zhejiang University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004835","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Information Sciences"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1016\/j.ins.2024.121694","type":"journal-article","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T23:50:31Z","timestamp":1732837831000},"page":"121694","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Self-distillation with model averaging"],"prefix":"10.1016","volume":"694","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7948-7672","authenticated-orcid":false,"given":"Xiaozhe","family":"Gu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3141-1278","authenticated-orcid":false,"given":"Zixun","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Ran","family":"Jin","sequence":"additional","affiliation":[]},{"given":"Rick Siow","family":"Mong Goh","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Luo","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.ins.2024.121694_br0010","series-title":"Advances in Neural Information Processing Systems","first-page":"1097","article-title":"Imagenet classification with deep convolutional neural networks","author":"Krizhevsky","year":"2012"},{"author":"Hackel","key":"10.1016\/j.ins.2024.121694_br0020"},{"key":"10.1016\/j.ins.2024.121694_br0030","series-title":"International Conference on Machine Learning","first-page":"647","article-title":"Decaf: a deep convolutional activation feature for generic visual recognition","author":"Donahue","year":"2014"},{"issue":"12","key":"10.1016\/j.ins.2024.121694_br0040","doi-asserted-by":"crossref","first-page":"9904","DOI":"10.1109\/TPAMI.2021.3132068","article-title":"Ctnet: context-based tandem network for semantic segmentation","volume":"44","author":"Li","year":"2021","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"6","key":"10.1016\/j.ins.2024.121694_br0050","doi-asserted-by":"crossref","first-page":"7451","DOI":"10.1007\/s40747-023-01135-y","article-title":"Hysteresis quantified control for switched reaction\u2013diffusion systems and its application","volume":"9","author":"Peng","year":"2023","journal-title":"Complex Intell. Syst."},{"issue":"14","key":"10.1016\/j.ins.2024.121694_br0060","doi-asserted-by":"crossref","DOI":"10.1016\/j.jfranklin.2024.107070","article-title":"Quantized control for interconnected pde systems via mobile measurement and control strategies","volume":"361","author":"Zheng","year":"2024","journal-title":"J. Franklin Inst."},{"author":"Han","key":"10.1016\/j.ins.2024.121694_br0070"},{"key":"10.1016\/j.ins.2024.121694_br0080","series-title":"International Conference on Machine Learning, PMLR","first-page":"1737","article-title":"Deep learning with limited numerical precision","author":"Gupta","year":"2015"},{"author":"Hinton","key":"10.1016\/j.ins.2024.121694_br0090"},{"author":"Furlanello","key":"10.1016\/j.ins.2024.121694_br0100"},{"author":"Zagoruyko","key":"10.1016\/j.ins.2024.121694_br0110"},{"author":"Izmailov","key":"10.1016\/j.ins.2024.121694_br0120"},{"issue":"4","key":"10.1016\/j.ins.2024.121694_br0130","doi-asserted-by":"crossref","first-page":"838","DOI":"10.1137\/0330046","article-title":"Acceleration of stochastic approximation by averaging","volume":"30","author":"Polyak","year":"1992","journal-title":"SIAM J. Control Optim."},{"year":"1988","series-title":"Efficient estimations from a slowly convergent Robbins-Monro process","author":"Ruppert","key":"10.1016\/j.ins.2024.121694_br0140"},{"author":"Loshchilov","key":"10.1016\/j.ins.2024.121694_br0150"},{"key":"10.1016\/j.ins.2024.121694_br0160","first-page":"22405","article-title":"Swad: domain generalization by seeking flat minima","volume":"34","author":"Cha","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"author":"Rame","key":"10.1016\/j.ins.2024.121694_br0170"},{"author":"Mirzadeh","key":"10.1016\/j.ins.2024.121694_br0180"},{"author":"Tian","key":"10.1016\/j.ins.2024.121694_br0190"},{"author":"Cooper","key":"10.1016\/j.ins.2024.121694_br0200"},{"author":"He","key":"10.1016\/j.ins.2024.121694_br0210"},{"author":"Jha","key":"10.1016\/j.ins.2024.121694_br0220"},{"author":"Garipov","key":"10.1016\/j.ins.2024.121694_br0230"},{"key":"10.1016\/j.ins.2024.121694_br0240","series-title":"Ensemble Methods in Machine Learning, in: International Workshop on Multiple Classifier Systems","first-page":"1","author":"Dietterich","year":"2000"},{"author":"Yuan","key":"10.1016\/j.ins.2024.121694_br0250"},{"author":"Zhou","key":"10.1016\/j.ins.2024.121694_br0260"},{"key":"10.1016\/j.ins.2024.121694_br0270","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"3967","article-title":"Relational knowledge distillation","author":"Park","year":"2019"},{"key":"10.1016\/j.ins.2024.121694_br0280","first-page":"33716","article-title":"Knowledge distillation from a stronger teacher","volume":"35","author":"Huang","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"author":"Huang","key":"10.1016\/j.ins.2024.121694_br0290"},{"author":"Gu","key":"10.1016\/j.ins.2024.121694_br0300"},{"key":"10.1016\/j.ins.2024.121694_br0310","series-title":"Advances in Neural Information Processing Systems","first-page":"13153","article-title":"A simple baseline for Bayesian uncertainty in deep learning","author":"Maddox","year":"2019"},{"author":"Yang","key":"10.1016\/j.ins.2024.121694_br0320"},{"author":"Gupta","key":"10.1016\/j.ins.2024.121694_br0330"},{"key":"10.1016\/j.ins.2024.121694_br0340","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"441","article-title":"Few-shot image recognition with knowledge transfer","author":"Peng","year":"2019"},{"key":"10.1016\/j.ins.2024.121694_br0350","series-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"1339","article-title":"Unsupervised multi-target domain adaptation through knowledge distillation","author":"Nguyen-Meidine","year":"2021"},{"author":"M\u00fcller","key":"10.1016\/j.ins.2024.121694_br0360"},{"author":"Hahn","key":"10.1016\/j.ins.2024.121694_br0370"},{"author":"Clark","key":"10.1016\/j.ins.2024.121694_br0380"},{"key":"10.1016\/j.ins.2024.121694_br0390","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"3713","article-title":"Be your own teacher: improve the performance of convolutional neural networks via self distillation","author":"Zhang","year":"2019"},{"key":"10.1016\/j.ins.2024.121694_br0400","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"13876","article-title":"Regularizing class-wise predictions via self-knowledge distillation","author":"Yun","year":"2020"},{"year":"2009","series-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","key":"10.1016\/j.ins.2024.121694_br0410"},{"year":"2007","series-title":"Caltech-256 object category dataset","author":"Griffin","key":"10.1016\/j.ins.2024.121694_br0420"},{"year":"2011","series-title":"The caltech-ucsd birds-200-2011 dataset","author":"Wah","key":"10.1016\/j.ins.2024.121694_br0430"},{"issue":"5","key":"10.1016\/j.ins.2024.121694_br0440","first-page":"8","article-title":"Tiny imagenet classification with convolutional neural networks","volume":"2","author":"Yao","year":"2015","journal-title":"CS 231N"},{"key":"10.1016\/j.ins.2024.121694_br0450","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"4320","article-title":"Deep mutual learning","author":"Zhang","year":"2018"},{"issue":"4","key":"10.1016\/j.ins.2024.121694_br0460","doi-asserted-by":"crossref","first-page":"2070","DOI":"10.1109\/TCYB.2020.3007506","article-title":"Highlight every step: knowledge distillation via collaborative teaching","volume":"52","author":"Zhao","year":"2020","journal-title":"IEEE Trans. Cybern."},{"author":"Xu","key":"10.1016\/j.ins.2024.121694_br0470"}],"container-title":["Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0020025524016086?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0020025524016086?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,12,25]],"date-time":"2024-12-25T03:26:48Z","timestamp":1735097208000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0020025524016086"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3]]},"references-count":47,"alternative-id":["S0020025524016086"],"URL":"https:\/\/doi.org\/10.1016\/j.ins.2024.121694","relation":{},"ISSN":["0020-0255"],"issn-type":[{"type":"print","value":"0020-0255"}],"subject":[],"published":{"date-parts":[[2025,3]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Self-distillation with model averaging","name":"articletitle","label":"Article Title"},{"value":"Information Sciences","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.ins.2024.121694","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier Inc. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"121694"}}