{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2023,3,6]],"date-time":"2023-03-06T08:13:57Z","timestamp":1678090437624},"reference-count":43,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Defense Challengeable Future Technology Program of the Agency for Defense Development, Republic of Korea"},{"DOI":"10.13039\/501100003696","name":"Electronics and Telecommunications Research Institute (ETRI) grant funded by the Korean government","doi-asserted-by":"publisher","award":["20ZS1100, Core Technology Research for Self-Improving Integrated Artificial Intelligence System"],"id":[{"id":"10.13039\/501100003696","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/access.2021.3070012","type":"journal-article","created":{"date-parts":[[2021,3,31]],"date-time":"2021-03-31T19:45:02Z","timestamp":1617219902000},"page":"52601-52618","source":"Crossref","is-referenced-by-count":2,"title":["Accelerating Distributed SGD With Group Hybrid Parallelism"],"prefix":"10.1109","volume":"9","author":[{"ORCID":"http:\/\/orcid.org\/0000-0001-7339-1645","authenticated-orcid":false,"given":"Kyung-No","family":"Joo","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-3970-7308","authenticated-orcid":false,"given":"Chan-Hyun","family":"Youn","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623612"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783270"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2017.2752706"},{"key":"ref32","article-title":"Deep gradient compression: Reducing the communication bandwidth for distributed training","author":"lin","year":"2018","journal-title":"Proc Int Conf Learn Represent (ICLR)"},{"key":"ref31","first-page":"1709","article-title":"QSGD: Communication-efficient SGD via gradient quantizationand encoding","author":"alistarh","year":"2017","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref30","article-title":"How to scale distributed deep learning?","author":"jin","year":"2016","journal-title":"Proc ML Syst Workshop NIPS"},{"key":"ref37","article-title":"Stanza: Layer separation for distributed training in deep learning","author":"wu","year":"2018","journal-title":"arXiv 1812 10624"},{"key":"ref36","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"arXiv 1707 06347"},{"key":"ref35","article-title":"Exploring hidden dimensions in parallelizing convolutional neural networks","author":"jia","year":"2018","journal-title":"arXiv 1802 04924"},{"key":"ref34","article-title":"GPipe: Efficient training of giant neuralnetworks using pipeline parallelism","author":"huang","year":"2019","journal-title":"Proc Neural Inf Process Syst (NeurIPS)"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/MLHPC.2016.006"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-8191(06)80021-9"},{"key":"ref11","first-page":"1","article-title":"Paleo: A performance model for deep neural networks","author":"qi","year":"2017","journal-title":"Proc ICLR"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/PADSW.2018.8644932"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CCGRID.2019.00068"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1177\/1094342005051521"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"ref16","article-title":"Device placement optimization with reinforcement learning","author":"mirhoseini","year":"2017","journal-title":"arXiv 1706 04972"},{"key":"ref17","first-page":"1","article-title":"Placeto: Learning generalizable device placement algorithms for distributed machine learning","author":"addanki","year":"2019","journal-title":"Proc NIPS"},{"key":"ref18","first-page":"6105","article-title":"EfficientNet: Rethinking model scaling for convolutional neural networks","author":"tan","year":"2019","journal-title":"Proc ICML"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2019.2913833"},{"key":"ref28","article-title":"Solving the straggler problem with bounded staleness","author":"cipar","year":"2013","journal-title":"Proc 14th Workshop Hot Topics Oper Syst (HotOS)"},{"key":"ref4","article-title":"Scalable deep learning on distributed infrastructures: Challenges, techniques and tools","author":"mayer","year":"2019","journal-title":"arXiv 1903 11314"},{"key":"ref27","article-title":"Revisiting distributed synchronous SGD","author":"chen","year":"2016","journal-title":"arXiv 1604 00981"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3320060"},{"key":"ref6","first-page":"2545","article-title":"Trading redundancy for communication: Speeding up distributed SGD for non-convex optimization","author":"haddadpour","year":"2019","journal-title":"Proc 36th Int Conf Mach Learn (PMLR)"},{"key":"ref29","first-page":"803","article-title":"Slow and stale gradients can win the race: Error-runtime trade-offs in distributed SGD","author":"dutta","year":"2018","journal-title":"Proc Int Conf Artif Intell Statist"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783721"},{"key":"ref8","first-page":"1","article-title":"Beyond data and model parallelism for deep neural networks","author":"jia","year":"2019","journal-title":"Proc 2nd SysML Conf"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2018.2877359"},{"key":"ref2","article-title":"Distributed deep learning using synchronous stochastic gradient descent","author":"das","year":"2016","journal-title":"arXiv 1602 06709"},{"key":"ref9","article-title":"Integrated model, batch and domain parallelism in training neural networks","author":"gholami","year":"2017","journal-title":"arXiv 1712 04432"},{"key":"ref1","first-page":"239","article-title":"The turing O-machine and the DIME network architecture: Injecting the architectural resiliency into distributed computing","volume":"10","author":"mikkilineni","year":"2012","journal-title":"Turing-100"},{"key":"ref20","article-title":"One weird trick for parallelizing convolutional neural networks","author":"krizhevsky","year":"2014","journal-title":"arXiv 1404 5997"},{"key":"ref22","article-title":"Federated learning: Strategies for improving communication efficiency","author":"konecn\u00fd","year":"2016","journal-title":"arXiv 1610 05492"},{"key":"ref21","article-title":"Accurate, large minibatch SGD: Training ImageNet in 1 hour","author":"goyal","year":"2017","journal-title":"arXiv 1706 02677"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/321978.321985"},{"key":"ref24","article-title":"Towards federated learning at scale: System design","author":"bonawitz","year":"2019","journal-title":"arXiv 1902 01046"},{"key":"ref41","first-page":"1","article-title":"Optimization of Collective Reduction Operations","author":"rabenseifner","year":"2004","journal-title":"Proc Int Conf Comput Sci"},{"key":"ref23","article-title":"Communication-efficient learning of deep networks from decentralized data","author":"mcmahan","year":"2016","journal-title":"arXiv 1602 05629"},{"key":"ref26","first-page":"1223","article-title":"Large scale distributed deep networks","author":"dean","year":"2012","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/42411.42415"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639343"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9312710\/09391652.pdf?arnumber=9391652","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,26]],"date-time":"2022-01-26T18:39:53Z","timestamp":1643222393000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9391652\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/access.2021.3070012","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}