{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,21]],"date-time":"2024-09-21T04:24:09Z","timestamp":1726892649293},"publisher-location":"Cham","reference-count":26,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030967710"},{"type":"electronic","value":"9783030967727"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-030-96772-7_1","type":"book-chapter","created":{"date-parts":[[2022,3,15]],"date-time":"2022-03-15T21:13:08Z","timestamp":1647378788000},"page":"3-14","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Accelerating GPU-Based Out-of-Core Stencil Computation with\u00a0On-the-Fly Compression"],"prefix":"10.1007","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-2090-159X","authenticated-orcid":false,"given":"Jingcheng","family":"Shen","sequence":"first","affiliation":[]},{"given":"Yifan","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Masao","family":"Okita","sequence":"additional","affiliation":[]},{"given":"Fumihiko","family":"Ino","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,3,16]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Adams, S., Payne, J., Boppana, R.: Finite difference time domain (FDTD) simulations using graphics processors. In: 2007 DoD High Performance Computing Modernization Program Users Group Conference, pp. 334\u2013338. IEEE (2007)","DOI":"10.1109\/HPCMP-UGC.2007.34"},{"issue":"2","key":"1_CR2","doi-asserted-by":"publisher","first-page":"397","DOI":"10.1177\/1094342018762036","volume":"33","author":"J Calhoun","year":"2019","unstructured":"Calhoun, J., Cappello, F., Olson, L.N., Snir, M., Gropp, W.D.: Exploring the feasibility of lossy compression for PDE simulations. Int. J. High Perf. Comput. Appl. 33(2), 397\u2013410 (2019)","journal-title":"Int. J. High Perf. Comput. Appl."},{"key":"1_CR3","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/978-3-030-63393-6_7","volume-title":"Driving Scientific and Engineering Discoveries Through the Convergence of HPC, Big Data and AI","author":"F Cappello","year":"2020","unstructured":"Cappello, F., Di, S., Gok, A.M.: Fulfilling the promises of lossy compression for scientific applications. In: Nichols, J., Verastegui, B., Maccabe, A.B., Hernandez, O., Parete-Koon, S., Ahearn, T. (eds.) SMC 2020. CCIS, vol. 1315, pp. 99\u2013116. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-63393-6_7"},{"key":"1_CR4","doi-asserted-by":"crossref","unstructured":"Farres, A., Rosas, C., Hanzich, M., Jord\u00e0, M., Pe\u00f1a, A.: Performance evaluation of fully anisotropic elastic wave propagation on NVIDIA volta GPUs. In: 81st EAGE Conference and Exhibition 2019, vol. 2019, pp. 1\u20135. European Association of Geoscientists & Engineers (2019)","DOI":"10.3997\/2214-4609.201901307"},{"issue":"3","key":"1_CR5","doi-asserted-by":"publisher","first-page":"956","DOI":"10.1109\/JBHI.2014.2310745","volume":"18","author":"K Ikeda","year":"2014","unstructured":"Ikeda, K., Ino, F., Hagihara, K.: Efficient acceleration of mutual information computation for nonrigid registration using CUDA. IEEE J. Biomed. Health Inf. 18(3), 956\u2013968 (2014)","journal-title":"IEEE J. Biomed. Health Inf."},{"key":"1_CR6","doi-asserted-by":"crossref","unstructured":"Jin, G., Lin, J., Endo, T.: Efficient utilization of memory hierarchy to enable the computation on bigger domains for stencil computation in CPU-GPU based systems. In: 2014 International Conference on High Performance Computing and Applications (ICHPCA), pp. 1\u20136. IEEE (2014)","DOI":"10.1109\/ICHPCA.2014.7045354"},{"key":"1_CR7","doi-asserted-by":"crossref","unstructured":"Jin, S., et al.: Understanding GPU-based lossy compression for extreme-scale cosmological simulations. In: 2020 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 105\u2013115. IEEE (2020)","DOI":"10.1109\/IPDPS47924.2020.00021"},{"issue":"12","key":"1_CR8","doi-asserted-by":"publisher","first-page":"2674","DOI":"10.1109\/TVCG.2014.2346458","volume":"20","author":"P Lindstrom","year":"2014","unstructured":"Lindstrom, P.: Fixed-rate compressed floating-point arrays. IEEE Trans. Vis. Comput. Graph. 20(12), 2674\u20132683 (2014)","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"issue":"1","key":"1_CR9","first-page":"19","volume":"13","author":"N Miki","year":"2019","unstructured":"Miki, N., Ino, F., Hagihara, K.: PACC: a directive-based programming framework for out-of-core stencil computation on accelerators. Int. J. High Perf. Comput. Netw. 13(1), 19\u201334 (2019)","journal-title":"Int. J. High Perf. Comput. Netw."},{"issue":"3","key":"1_CR10","doi-asserted-by":"publisher","first-page":"350","DOI":"10.1016\/j.cag.2008.04.007","volume":"32","author":"D Nagayasu","year":"2008","unstructured":"Nagayasu, D., Ino, F., Hagihara, K.: A decompression pipeline for accelerating out-of-core volume rendering of time-varying data. Comput. Graph. 32(3), 350\u2013362 (2008)","journal-title":"Comput. Graph."},{"key":"1_CR11","unstructured":"NVIDIA Corporation: CUDA C++ Programming Guide v11.4 (2021)"},{"key":"1_CR12","unstructured":"NVIDIA Developer: nvComp: High Speed Data Compression Using NVIDIA GPUs (2021)"},{"issue":"8","key":"1_CR13","doi-asserted-by":"publisher","first-page":"1966","DOI":"10.1109\/TPDS.2013.198","volume":"25","author":"T Okuyama","year":"2013","unstructured":"Okuyama, T., et al.: Accelerating ode-based simulation of general and heterogeneous biophysical models using a GPU. IEEE Trans. Parallel Distrib. Syst. 25(8), 1966\u20131975 (2013)","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"1_CR14","volume-title":"Using OpenMP# The Next Step: Affinity, Accelerators, Tasking, and SIMD","author":"R Van der Pas","year":"2017","unstructured":"Van der Pas, R., Stotzer, E., Terboven, C.: Using OpenMP# The Next Step: Affinity, Accelerators, Tasking, and SIMD. MIT press, Cambridge (2017)"},{"key":"1_CR15","doi-asserted-by":"crossref","unstructured":"Serpa, M.S., et al.: Strategies to improve the performance of a geophysics model for different manycore systems. In: 2017 International Symposium on Computer Architecture and High Performance Computing Workshops (SBAC-PADW), pp. 49\u201354. IEEE (2017)","DOI":"10.1109\/SBAC-PADW.2017.17"},{"issue":"12","key":"1_CR16","doi-asserted-by":"publisher","first-page":"2421","DOI":"10.1587\/transinf.2020PAP0014","volume":"103","author":"J Shen","year":"2020","unstructured":"Shen, J., Ino, F., Farr\u00e9s, A., Hanzich, M.: A data-centric directive-based framework to accelerate out-of-core stencil computation on a GPU. IEICE Trans. Inf. Syst. 103(12), 2421\u20132434 (2020)","journal-title":"IEICE Trans. Inf. Syst."},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Shen, J., Mei, J., Walld\u00e9n, M., Ino, F.: Integrating GPU support for freesurfer with openacc. In: 2020 IEEE 6th International Conference on Computer and Communications (ICCC), pp. 1622\u20131628. IEEE (2020)","DOI":"10.1109\/ICCC51575.2020.9345102"},{"key":"1_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"254","DOI":"10.1007\/978-3-319-65482-9_17","volume-title":"Algorithms and Architectures for Parallel Processing","author":"J Shen","year":"2017","unstructured":"Shen, J., Shigeoka, K., Ino, F., Hagihara, K.: An out-of-core branch and bound method for solving the 0-1 knapsack problem on a GPU. In: Ibrahim, S., Choo, K.-K.R., Yan, Z., Pedrycz, W. (eds.) ICA3PP 2017. LNCS, vol. 10393, pp. 254\u2013267. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-65482-9_17"},{"key":"1_CR19","doi-asserted-by":"crossref","unstructured":"Shen, J., Shigeoka, K., Ino, F., Hagihara, K.: GPU-based branch-and-bound method to solve large 0\u20131 knapsack problems with data-centric strategies. Concurr. Comput. Pract. Exp. 31(4), e4954 (2019)","DOI":"10.1002\/cpe.4954"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Shimokawabe, T., Endo, T., Onodera, N., Aoki, T.: A stencil framework to realize large-scale computations beyond device memory capacity on GPU supercomputers. In: 2017 IEEE International Conference on Cluster Computing (CLUSTER), pp. 525\u2013529. IEEE (2017)","DOI":"10.1109\/CLUSTER.2017.97"},{"issue":"3","key":"1_CR21","doi-asserted-by":"publisher","first-page":"711","DOI":"10.1007\/s10766-016-0454-1","volume":"45","author":"M Sourouri","year":"2017","unstructured":"Sourouri, M., Baden, S.B., Cai, X.: Panda: a compiler framework for concurrent CPU+ GPU execution of 3D stencil computations on GPU-accelerated supercomputers. Int. J. Parallel Program. 45(3), 711\u2013729 (2017)","journal-title":"Int. J. Parallel Program."},{"issue":"4","key":"1_CR22","doi-asserted-by":"publisher","first-page":"1580","DOI":"10.1007\/s11227-017-2184-6","volume":"74","author":"S Tabik","year":"2018","unstructured":"Tabik, S., Peemen, M., Romero, L.F.: A tuning approach for iterative multiple 3d stencil pipeline on GPUs: anisotropic nonlinear diffusion algorithm as case study. J. Supercomput. 74(4), 1580\u20131608 (2018)","journal-title":"J. Supercomput."},{"key":"1_CR23","doi-asserted-by":"crossref","unstructured":"Tao, D., Di, S., Liang, X., Chen, Z., Cappello, F.: Improving performance of iterative methods by lossy checkponting. In: Proceedings of the 27th International Symposium on High-Performance Parallel and Distributed Computing, pp. 52\u201365 (2018)","DOI":"10.1145\/3208040.3208050"},{"key":"1_CR24","doi-asserted-by":"crossref","unstructured":"Tian, J., et al.: Cusz: an efficient GPU-based error-bounded lossy compression framework for scientific data. arXiv preprint arXiv:2007.09625 (2020)","DOI":"10.1145\/3410463.3414624"},{"key":"1_CR25","doi-asserted-by":"crossref","unstructured":"Wu, X.C., et al.: Full-state quantum circuit simulation by using data compression. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201324 (2019)","DOI":"10.1145\/3295500.3356155"},{"key":"1_CR26","doi-asserted-by":"crossref","unstructured":"Zhou, Q., et al.: Designing high-performance MPI libraries with on-the-fly compression for modern gpu clusters. In: 2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 444\u2013453. IEEE (2021)","DOI":"10.1109\/IPDPS49936.2021.00053"}],"container-title":["Lecture Notes in Computer Science","Parallel and Distributed Computing, Applications and Technologies"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-96772-7_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,20]],"date-time":"2024-09-20T09:55:31Z","timestamp":1726826131000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-96772-7_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783030967710","9783030967727"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-96772-7_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"16 March 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PDCAT","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Parallel and Distributed Computing: Applications and Technologies","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 December 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 December 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"pdcat2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/cse.sysu.edu.cn\/pdcat2021\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"97","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"34","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"25% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}