{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:20:14Z","timestamp":1740122414399,"version":"3.37.3"},"reference-count":18,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2020,7,11]],"date-time":"2020-07-11T00:00:00Z","timestamp":1594425600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,7,11]],"date-time":"2020-07-11T00:00:00Z","timestamp":1594425600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100011512","name":"National Research Foundation","doi-asserted-by":"publisher","award":["2019R1H1A2039658"],"id":[{"id":"10.13039\/100011512","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["2015M3C4A706564"],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]},{"name":"MOTIE\/KEIT","award":["0076476"]},{"DOI":"10.13039\/501100013173","name":"Gyeonggi-do Regional Research Center","doi-asserted-by":"publisher","award":["GRRC-KAU-2019-B01"],"id":[{"id":"10.13039\/501100013173","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Information Technology Research Center","award":["IITP-2020-2018-0-01423"]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Cluster Comput"],"published-print":{"date-parts":[[2020,9]]},"DOI":"10.1007\/s10586-020-03144-9","type":"journal-article","created":{"date-parts":[[2020,7,11]],"date-time":"2020-07-11T09:02:50Z","timestamp":1594458170000},"page":"2287-2300","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Towards an optimized distributed deep learning framework for a heterogeneous multi-GPU cluster"],"prefix":"10.1007","volume":"23","author":[{"given":"Youngrang","family":"Kim","sequence":"first","affiliation":[]},{"given":"Hyeonseong","family":"Choi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6248-9567","authenticated-orcid":false,"given":"Jaehwan","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Jik-Soo","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Hyunseung","family":"Jei","sequence":"additional","affiliation":[]},{"given":"Hongchan","family":"Roh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,7,11]]},"reference":[{"key":"3144_CR1","unstructured":"Zinkevich, M., Weimer, M., Li, L., Smola, A.J.: Parallelized stochastic gradient descent. In: Advances in Neural Information Processing Systems 23"},{"key":"3144_CR2","doi-asserted-by":"crossref","unstructured":"Heigold, G., McDermott, E., Vanhoucke, V., Senior, A., Bacchiani, M.: Asynchronous stochastic optimization for sequence training of deep neural networks. In: 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","DOI":"10.1109\/ICASSP.2014.6854672"},{"key":"3144_CR3","unstructured":"Sergeev, A., Balso, M.D..: Horovod: fast and easy distributed deep learning in tensorflow. In: arxiv.org, Feb 2018"},{"key":"3144_CR4","unstructured":"Ho, Q., Cipar, J., Cui, H., Lee, S., Kim, J.K., Gibbons, P.B., Gibson, G.A., Ganger, G., Xing, E.P.: More effective distributed ml via a stale synchronous parallel parameter server. In: Advances in Neural Information Processing Systems 26 (NIPS 2013)"},{"key":"3144_CR5","unstructured":"TensorFlow: an open source machine learning library for research and production. https:\/\/www.tensorflow.org\/"},{"key":"3144_CR6","unstructured":"Goyal, P., Doll\u00e1r, P., Girshick, R., Noordhuis, P., Wesolowski, L., Kyrola, A., Tulloch, A., Jia, Y., He, K.: Accurate, large minibatch sgd: Training imagenet in 1 hour. In: arxiv.org, April 2018"},{"key":"3144_CR7","unstructured":"MPICH: high-performance portable MPI, https:\/\/www.mpich.org\/"},{"key":"3144_CR8","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: arxiv.org, 2015","DOI":"10.1109\/CVPR.2016.90"},{"key":"3144_CR9","unstructured":"ImageNet, https:\/\/image-net.org, 2017"},{"key":"3144_CR10","unstructured":"Distributed TensorFlow, https:\/\/www.tensorflow.org\/deploy\/distributed\/"},{"key":"3144_CR11","unstructured":"asyncio\u2014Asynchronous I\/O, http:\/\/docs.python.org\/3\/library\/asyncio.html"},{"key":"3144_CR12","unstructured":"py_func, https:\/\/www.tensorflow.org\/ api_docs\/python\/tf\/py_func"},{"key":"3144_CR13","doi-asserted-by":"crossref","unstructured":"Mathuriya, A., Bard, A., Mendygral, P., Meadows, L., Arnemann, J., Shao, L., He, S., Karna, t., Moise, D., Pennycook, S.J., Maschoff, K., Sewall, J., Kumar, N., Ho, S., Ringenburg, M., Prabhat, Lee, V.: Cosmoflow: using deep learning to learn the universe at scale. In: arxiv.org, Aug 2018","DOI":"10.1109\/SC.2018.00068"},{"key":"3144_CR14","doi-asserted-by":"crossref","unstructured":"Kim, S., Yu, G.-I., Park, H., Cho, S., Jeong, E., Ha, H., Lee, S., Jeong, J.S., Chun, B.-G. Parallax: sparsity-aware data parallel training of deep neural networks. In: EuroSys 2019, March 2019","DOI":"10.1145\/3302424.3303957"},{"key":"3144_CR15","unstructured":"Lian, X., Zhang, W., Zhang, C., Liu, J.: Asynchronous decentralized parallel stochastic gradient descent. In: Dy, J.G., Krause, A., Eds., Proceedings of the 35th International Conference on Machine Learning, ICML 2018, Stockholmsm\u00e4ssan, Stockholm, Sweden, July 10\u201315, 2018, series Proceedings of Machine Learning Research, vol.\u00a080. PMLR, 2018, pp. 3049\u20133058. http:\/\/proceedings.mlr.press\/v80\/lian18a.html"},{"key":"3144_CR16","doi-asserted-by":"crossref","unstructured":"Luo, Q., Lin, J., Zhuo, Y., Qian, X.: Hop: Heterogeneity-aware decentralized training. In: Proceedings of the Twenty-Fourth International Conference on Architectural Support for Programming Languages and Operating Systems, 2019, pp. 893\u2013907","DOI":"10.1145\/3297858.3304009"},{"key":"3144_CR17","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1016\/j.parco.2019.03.005","volume":"85","author":"AA Awan","year":"2019","unstructured":"Awan, A.A., Manian, K.V., Chu, C.-H., Subramoni, H., Panda, D.K.: Optimized large-message broadcast for deep learning workloads: MPI, MPI+NCCL, or NCCL2? Parallel Comput. 85, 141\u2013152 (2019)","journal-title":"Parallel Comput."},{"key":"3144_CR18","unstructured":"SONY Breaks ResNet-50 Training Record with NVIDIA V100 Tensor Core GPUs. http:\/\/news.developer.nvidia.com\/sony-breaks-resnet-50-training-record-with-nvidia-v100-tensor-core-gpus\/"}],"container-title":["Cluster Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-020-03144-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10586-020-03144-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10586-020-03144-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,7,10]],"date-time":"2021-07-10T23:16:07Z","timestamp":1625958967000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10586-020-03144-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7,11]]},"references-count":18,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2020,9]]}},"alternative-id":["3144"],"URL":"https:\/\/doi.org\/10.1007\/s10586-020-03144-9","relation":{},"ISSN":["1386-7857","1573-7543"],"issn-type":[{"type":"print","value":"1386-7857"},{"type":"electronic","value":"1573-7543"}],"subject":[],"published":{"date-parts":[[2020,7,11]]},"assertion":[{"value":"29 November 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 May 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 June 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 July 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}