{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T01:40:18Z","timestamp":1732153218285,"version":"3.28.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,30]]},"DOI":"10.1145\/3650200.3656636","type":"proceedings-article","created":{"date-parts":[[2024,6,3]],"date-time":"2024-06-03T18:11:54Z","timestamp":1717438314000},"page":"437-448","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["gZCCL: Compression-Accelerated Collective Communication Framework for GPU Clusters"],"prefix":"10.1145","author":[{"ORCID":"http:\/\/orcid.org\/0000-0001-5092-3987","authenticated-orcid":false,"given":"Jiajun","family":"Huang","sequence":"first","affiliation":[{"name":"University of California, Riverside, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-9935-5674","authenticated-orcid":false,"given":"Sheng","family":"Di","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-6244-1264","authenticated-orcid":false,"given":"Xiaodong","family":"Yu","sequence":"additional","affiliation":[{"name":"Stevens Institute of Technology, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-2688-8058","authenticated-orcid":false,"given":"Yujia","family":"Zhai","sequence":"additional","affiliation":[{"name":"University of California, Riverside, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-0177-502X","authenticated-orcid":false,"given":"Jinyang","family":"Liu","sequence":"additional","affiliation":[{"name":"University of California, Riverside, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-7370-6766","authenticated-orcid":false,"given":"Yafan","family":"Huang","sequence":"additional","affiliation":[{"name":"University of Iowa, USA"}]},{"ORCID":"http:\/\/orcid.org\/0009-0003-4705-2713","authenticated-orcid":false,"given":"Ken","family":"Raffenetti","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-4422-2911","authenticated-orcid":false,"given":"Hui","family":"Zhou","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-5328-3962","authenticated-orcid":false,"given":"Kai","family":"Zhao","sequence":"additional","affiliation":[{"name":"Florida State University, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0001-7581-8905","authenticated-orcid":false,"given":"Xiaoyi","family":"Lu","sequence":"additional","affiliation":[{"name":"University of California, Merced, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0003-2578-4940","authenticated-orcid":false,"given":"Zizhong","family":"Chen","sequence":"additional","affiliation":[{"name":"University of California, Riverside, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-7890-3934","authenticated-orcid":false,"given":"Franck","family":"Cappello","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-3731-5423","authenticated-orcid":false,"given":"Yanfei","family":"Guo","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, USA"}]},{"ORCID":"http:\/\/orcid.org\/0000-0002-5532-3048","authenticated-orcid":false,"given":"Rajeev","family":"Thakur","sequence":"additional","affiliation":[{"name":"Argonne National Laboratory, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,6,3]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Tensorflow: A system for large-scale machine learning. In 12th { USENIX} symposium on operating systems design and implementation ({ OSDI} 16). 265\u2013283.","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, 2016. Tensorflow: A system for large-scale machine learning. In 12th { USENIX} symposium on operating systems design and implementation ({ OSDI} 16). 265\u2013283."},{"key":"e_1_3_2_1_2_1","unstructured":"Ahmed\u00a0M. Abdelmoniem Ahmed Elzanaty Mohamed-Slim Alouini and Marco Canini. 2021. An Efficient Statistical-based Gradient Compression Technique for Distributed Training Systems. arxiv:2101.10761\u00a0[cs.LG]"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1088149.1088183"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018743.3018769"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ExaMPI49596.2019.00007"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2018.00009"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00033"},{"key":"e_1_3_2_1_8_1","unstructured":"NVIDIA Corp.2023. NCCL \u2013 Optimized primitives for inter-GPU communication.https:\/\/github.com\/NVIDIA\/nccl."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2016.11"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPDC52870.2021.9521599"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Jiajun Huang Sheng Di Xiaodong Yu Yujia Zhai Zhaorui Zhang Jinyang Liu Xiaoyi Lu Ken Raffenetti Hui Zhou Kai Zhao Zizhong Chen Franck Cappello Yanfei Guo and Rajeev Thakur. 2023. An Optimized Error-controlled MPI Collective Framework Integrated with Lossy Compression. arxiv:2304.03890\u00a0[cs.DC]","DOI":"10.1109\/IPDPS57955.2024.00072"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData59044.2023.10386386"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607048"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/DLS49591.2019.00015"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.3997\/1365-2397.fb2020015"},{"key":"e_1_3_2_1_17_1","unstructured":"Argonne\u00a0National Laboratory. 2023. MPICH \u2013 A high-performance and widely portable implementation of the MPI-4.0 standard.https:\/\/www.mpich.org."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2014.2346458"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3639259"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jpdc.2008.09.002"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1166\/asem.2015.1673"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/sc41405.2020.00039"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3380595"},{"key":"e_1_3_2_1_24_1","volume-title":"A survey of forecast error measures. World applied sciences journal 24, 24","author":"Shcherbakov Maxim\u00a0Vladimirovich","year":"2013","unstructured":"Maxim\u00a0Vladimirovich Shcherbakov, Adriaan Brebels, Nataliya\u00a0Lvovna Shcherbakova, Anton\u00a0Pavlovich Tyukov, Timur\u00a0Alexandrovich Janovsky, Valeriy\u00a0Anatol\u2019evich Kamaev, 2013. A survey of forecast error measures. World applied sciences journal 24, 24 (2013), 171\u2013176."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","unstructured":"Dingwen Tao Sheng Di and Franck Cappello. 2017. Significantly Improving Lossy Compression for Scientific Data Sets Based on Multidimensional Prediction and Error-Controlled Quantization. https:\/\/doi.org\/10.1109\/IPDPS.2017.115","DOI":"10.1109\/IPDPS.2017.115"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342005051521"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3369583.3392688"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC56025.2022.00016"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00023"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS49936.2021.00053"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-07312-0_1"}],"event":{"name":"ICS '24: 2024 International Conference on Supercomputing","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Kyoto Japan","acronym":"ICS '24"},"container-title":["Proceedings of the 38th ACM International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3650200.3656636","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,21]],"date-time":"2024-11-21T01:03:38Z","timestamp":1732151018000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3650200.3656636"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,30]]},"references-count":31,"alternative-id":["10.1145\/3650200.3656636","10.1145\/3650200"],"URL":"https:\/\/doi.org\/10.1145\/3650200.3656636","relation":{},"subject":[],"published":{"date-parts":[[2024,5,30]]},"assertion":[{"value":"2024-06-03","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}