{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T05:44:47Z","timestamp":1736315087182,"version":"3.32.0"},"reference-count":25,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100003593","name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003593","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006162","name":"Funda\u00e7\u00e3o de Amparo \u00e0 Ci\u00eancia e Tecnologia do Estado de Pernambuco","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100006162","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100007065","name":"Nvidia","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100007065","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Applied Soft Computing"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1016\/j.asoc.2024.112264","type":"journal-article","created":{"date-parts":[[2024,9,21]],"date-time":"2024-09-21T06:30:12Z","timestamp":1726900212000},"page":"112264","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["Sigmoidal learning rate optimizer for deep neural network training using a two-phase adaptation approach"],"prefix":"10.1016","volume":"167","author":[{"given":"David","family":"Mac\u00eado","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6421-9747","authenticated-orcid":false,"given":"Cleber","family":"Zanchettin","sequence":"additional","affiliation":[]},{"given":"Teresa","family":"Ludermir","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.asoc.2024.112264_b1","first-page":"2933","article-title":"Identifying and attacking the saddle point problem in high-dimensional non-convex optimization","author":"Dauphin","year":"2014","journal-title":"Neural Inf. Process. Syst."},{"key":"10.1016\/j.asoc.2024.112264_b2","first-page":"8252","article-title":"Fixing the train-test resolution discrepancy","author":"Touvron","year":"2019","journal-title":"Neural Inf. Process. Syst."},{"key":"10.1016\/j.asoc.2024.112264_b3","doi-asserted-by":"crossref","unstructured":"D.S. Park, Y. Zhang, Y. Jia, W. Han, C. Chiu, B. Li, Y. Wu, Q.V. Le, Improved Noisy Student Training for Automatic Speech Recognition, in: Annual Conference of the Int. Speech Communication Association, 2020, pp. 2817\u20132821.","DOI":"10.21437\/Interspeech.2020-1470"},{"key":"10.1016\/j.asoc.2024.112264_b4","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019","journal-title":"Assoc. Comput. Linguist.: Hum. Lang. Technol."},{"issue":"3","key":"10.1016\/j.asoc.2024.112264_b5","doi-asserted-by":"crossref","first-page":"292","DOI":"10.3390\/electronics8030292","article-title":"A state-of-the-art survey on deep learning theory and architectures","volume":"8","author":"Alom","year":"2019","journal-title":"Electronics"},{"year":"2020","series-title":"Descending through a crowded valley - benchmarking deep learning optimizers","author":"Schmidt","key":"10.1016\/j.asoc.2024.112264_b6"},{"key":"10.1016\/j.asoc.2024.112264_b7","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"Duchi","year":"2011","journal-title":"J. M. L. Res."},{"key":"10.1016\/j.asoc.2024.112264_b8","series-title":"Proceedings of the 38th International Conference on Machine Learning","first-page":"9367","article-title":"Descending through a crowded valley - benchmarking deep learning optimizers","volume":"bol. 139","author":"Schmidt","year":"2021"},{"key":"10.1016\/j.asoc.2024.112264_b9","unstructured":"D.P. Kingma, J. Ba, Adam: A Method for Stochastic Optimization, in: International Conference on Learning Representations, 2015."},{"key":"10.1016\/j.asoc.2024.112264_b10","article-title":"RMSProp: Divide the gradient by a running average of its recent magnitude","author":"Tieleman","year":"2012","journal-title":"Neural Netw. Mach. Learn."},{"key":"10.1016\/j.asoc.2024.112264_b11","first-page":"4148","article-title":"The marginal value of adaptive gradient methods in machine learning","author":"Wilson","year":"2017","journal-title":"NeurIPS"},{"key":"10.1016\/j.asoc.2024.112264_b12","series-title":"Brazilian Conference on Intelligent Systems","first-page":"123","article-title":"Training aware sigmoidal optimization","author":"Mac\u00eado","year":"2022"},{"issue":"11","key":"10.1016\/j.asoc.2024.112264_b13","doi-asserted-by":"crossref","first-page":"2278","DOI":"10.1109\/5.726791","article-title":"Gradient-based learning applied to document recognition","volume":"86","author":"Lecun","year":"1998","journal-title":"Proc. IEEE"},{"key":"10.1016\/j.asoc.2024.112264_b14","unstructured":"K. Simonyan, A. Zisserman, Very Deep Convolutional Networks for Large-Scale Image Recognition, in: In. Conf. on Learning Representations, 2015."},{"key":"10.1016\/j.asoc.2024.112264_b15","doi-asserted-by":"crossref","unstructured":"K. He, X. Zhang, S. Ren, J. Sun, Deep Residual Learning for Image Recognition, in: IEEE Conf. on Comp. Vision and Pattern Recognition, 2016, pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"10.1016\/j.asoc.2024.112264_b16","doi-asserted-by":"crossref","unstructured":"G. Huang, Z. Liu, L. Van Der Maaten, K.Q. Weinberger, Densely connected convolutional networks, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 4700\u20134708.","DOI":"10.1109\/CVPR.2017.243"},{"year":"2009","series-title":"Learning Multiple Layers of Features from Tiny Images","author":"Krizhevsky","key":"10.1016\/j.asoc.2024.112264_b17"},{"key":"10.1016\/j.asoc.2024.112264_b18","doi-asserted-by":"crossref","unstructured":"J. Zhou, W. Xu, End-to-end learning of semantic role labeling using recurrent neural networks, in: Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), 2015, pp. 1127\u20131137.","DOI":"10.3115\/v1\/P15-1109"},{"key":"10.1016\/j.asoc.2024.112264_b19","series-title":"ACL","first-page":"1095","article-title":"Selectiveencodingfor abstractivesentence summarization","author":"Zhou","year":"2017"},{"key":"10.1016\/j.asoc.2024.112264_b20","article-title":"Character-level convolutional networks for text classification","volume":"28","author":"Zhang","year":"2015","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"5","key":"10.1016\/j.asoc.2024.112264_b21","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/0041-5553(64)90137-5","article-title":"Some methods of speeding up the convergence of iteration methods","volume":"4","author":"Polyak","year":"1964","journal-title":"USSR Comp. Math. Math. Phys."},{"year":"2013","series-title":"Generating sequences with recurrent neural networks","author":"Graves","key":"10.1016\/j.asoc.2024.112264_b22"},{"key":"10.1016\/j.asoc.2024.112264_b23","unstructured":"S.J. Reddi, S. Kale, S. Kumar, On the Convergence of Adam and Beyond, in: International Conference on Learning Representations, 2018."},{"key":"10.1016\/j.asoc.2024.112264_b24","doi-asserted-by":"crossref","unstructured":"N. Tishby, N. Zaslavsky, Deep learning and the information bottleneck principle, in: IEEE Information Theory Workshop, 2015, pp. 1\u20135.","DOI":"10.1109\/ITW.2015.7133169"},{"year":"2009","series-title":"Learning Multiple Layers of Features from Tiny Images","author":"Krizhevsky","key":"10.1016\/j.asoc.2024.112264_b25"}],"container-title":["Applied Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S156849462401038X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S156849462401038X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T02:38:21Z","timestamp":1736303901000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S156849462401038X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":25,"alternative-id":["S156849462401038X"],"URL":"https:\/\/doi.org\/10.1016\/j.asoc.2024.112264","relation":{},"ISSN":["1568-4946"],"issn-type":[{"type":"print","value":"1568-4946"}],"subject":[],"published":{"date-parts":[[2024,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Sigmoidal learning rate optimizer for deep neural network training using a two-phase adaptation approach","name":"articletitle","label":"Article Title"},{"value":"Applied Soft Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.asoc.2024.112264","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"112264"}}