{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T11:46:09Z","timestamp":1725795969989},"publisher-location":"New York, NY, USA","reference-count":67,"publisher":"ACM","funder":[{"name":"National Natural Science Foundation of China","award":["1871107, 61602104"]},{"name":"National Key R&D Programof China","award":["2017YFC0805005, 2018YFB1702000"]},{"name":"Joint Fundsof the National Natural Science Foundation of China","award":["U1908212"]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,2,23]]},"DOI":"10.1145\/3373087.3375313","type":"proceedings-article","created":{"date-parts":[[2020,2,24]],"date-time":"2020-02-24T16:44:40Z","timestamp":1582562680000},"page":"299-309","update-policy":"http:\/\/dx.doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["Boyi"],"prefix":"10.1145","author":[{"given":"Jiantong","family":"Jiang","sequence":"first","affiliation":[{"name":"Northeastern University, Shenyang, China"}]},{"given":"Zeke","family":"Wang","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Z\u00fcrich, Switzerland"}]},{"given":"Xue","family":"Liu","sequence":"additional","affiliation":[{"name":"Northeastern University, Shenyang, China"}]},{"given":"Juan","family":"G\u00f3mez-Luna","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Z\u00fcrich, Switzerland"}]},{"given":"Nan","family":"Guan","sequence":"additional","affiliation":[{"name":"Hong Kong Polytechnic University, Hong Kong, Hong Kong"}]},{"given":"Qingxu","family":"Deng","sequence":"additional","affiliation":[{"name":"Northeastern University, Shenyang, China"}]},{"given":"Wei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology, Hong Kong, Hong Kong"}]},{"given":"Onur","family":"Mutlu","sequence":"additional","affiliation":[{"name":"ETH Z\u00fcrich, Z\u00fcrich, Switzerland"}]}],"member":"320","published-online":{"date-parts":[[2020,2,24]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Yi (husbandman). https:\/\/en.wikipedia.org\/wiki\/Yi_(husbandman). Accessed: 2019--12-09. Yi (husbandman). https:\/\/en.wikipedia.org\/wiki\/Yi_(husbandman). Accessed: 2019--12-09."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2664666.2664670"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3241793.3241804"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3030207.3030244"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306797"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2019.00020"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.23919\/FPL.2017.8056841"},{"key":"e_1_3_2_1_8_1","volume-title":"Arxiv","author":"Cong J.","year":"2018","unstructured":"J. Cong , Z. Fang , Y. Hao , P. Wei , C. H. Yu , C. Zhang , and P. Zhou . Best-effort FPGA programming: A few steps can go a long way . In Arxiv , 2018 . J. Cong, Z. Fang, Y. Hao, P. Wei, C. H. Yu, C. Zhang, and P. Zhou. Best-effort FPGA programming: A few steps can go a long way. In Arxiv, 2018."},{"key":"e_1_3_2_1_9_1","volume-title":"DAC","author":"Cong J.","year":"2018","unstructured":"J. Cong , P. Wei , C. H. Yu , and P. Zhang . Automated accelerator generation and optimization with composable, parallel and pipeline architecture . In DAC , 2018 . J. Cong, P. Wei, C. H. Yu, and P. Zhang. Automated accelerator generation and optimization with composable, parallel and pipeline architecture. In DAC, 2018."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2018.00043"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2019.8661187"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPT.2016.7929519"},{"key":"e_1_3_2_1_13_1","volume-title":"ISPASS","author":"G\u00f3mez-Luna J.","year":"2017","unstructured":"J. G\u00f3mez-Luna , I. El Hajj , V. Chang , Li-Wen Garcia-Flores , S. Garcia de Gonzalo, T. Jablin, A. J. Pena, and W.-m. Hwu. Chai: Collaborative heterogeneous applications for integrated-architectures . In ISPASS , 2017 . J. G\u00f3mez-Luna, I. El Hajj, V. Chang, Li-Wen Garcia-Flores, S. Garcia de Gonzalo, T. Jablin, A. J. Pena, and W.-m. Hwu. Chai: Collaborative heterogeneous applications for integrated-architectures. In ISPASS, 2017."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00138-012-0443-3"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2012.319"},{"key":"e_1_3_2_1_16_1","volume-title":"ASPDAC","author":"Guan Y.","year":"2017","unstructured":"Y. Guan , Z. Yuan , G. Sun , and J. Cong . FPGA-based accelerator for long short-term memory recurrent neural networks . In ASPDAC , 2017 . Y. Guan, Z. Yuan, G. Sun, and J. Cong. FPGA-based accelerator for long short-term memory recurrent neural networks. In ASPDAC, 2017."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317829"},{"key":"e_1_3_2_1_18_1","volume-title":"Nvidia, https:\/\/developer.nvidia.com\/gpugems\/gpugems3\/part-vi-gpu-computing\/chapter-39-parallel-prefix-sum-scan-cuda","author":"Harris M.","year":"2007","unstructured":"M. Harris . Parallel prefix sum (scan) with CUDA. Technical report , Nvidia, https:\/\/developer.nvidia.com\/gpugems\/gpugems3\/part-vi-gpu-computing\/chapter-39-parallel-prefix-sum-scan-cuda , 2007 . M. Harris. Parallel prefix sum (scan) with CUDA. Technical report, Nvidia, https:\/\/developer.nvidia.com\/gpugems\/gpugems3\/part-vi-gpu-computing\/chapter-39-parallel-prefix-sum-scan-cuda, 2007."},{"key":"e_1_3_2_1_19_1","volume-title":"ICPE","author":"Huang S.","year":"2019","unstructured":"S. Huang , L.-W. Chang , I. El Hajj , S. Garcia de Gonzalo , J. G\u00f3mez-Luna , S. R. Chalamalasetti , M. El-Hadedy , D. Milojicic , O. Mutlu , D. Chen , and W.-m. Hwu . Analysis and modeling of collaborative execution strategies for heterogeneous CPU-FPGA architectures . In ICPE , 2019 . S. Huang, L.-W. Chang, I. El Hajj, S. Garcia de Gonzalo, J. G\u00f3mez-Luna, S. R. Chalamalasetti, M. El-Hadedy, D. Milojicic, O. Mutlu, D. Chen, and W.-m. Hwu. Analysis and modeling of collaborative execution strategies for heterogeneous CPU-FPGA architectures. In ICPE, 2019."},{"volume-title":"Intel SDK for OpenCL Design Examples","year":"2018","key":"e_1_3_2_1_20_1","unstructured":"Intel. Intel SDK for OpenCL Design Examples . 2018 . Intel. Intel SDK for OpenCL Design Examples. 2018."},{"volume-title":"Intel SDK for OpenCL Optimization Guide","year":"2018","key":"e_1_3_2_1_21_1","unstructured":"Intel. Intel SDK for OpenCL Optimization Guide . 2018 . Intel. Intel SDK for OpenCL Optimization Guide. 2018."},{"key":"e_1_3_2_1_22_1","volume-title":"Dec.","author":"Jiang J.","year":"2019","unstructured":"J. Jiang . Boyi (FPGA2020 version). https:\/\/doi.org\/10.5281\/zenodo.3575234 , Dec. 2019 . Zenodo . 10.5281\/zenodo.3575234 J. Jiang. Boyi (FPGA2020 version). https:\/\/doi.org\/10.5281\/zenodo.3575234, Dec. 2019. Zenodo."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2018.00092"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.23919\/FPL.2017.8056844"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2018.00037"},{"key":"e_1_3_2_1_26_1","volume-title":"July","author":"Khronos","year":"2019","unstructured":"Khronos group. The OpenCL specification , July 2019 . Khronos group. The OpenCL specification, July 2019."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/360248.360252"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289602.3293910"},{"key":"e_1_3_2_1_29_1","volume-title":"LLVM: A compilation framework for lifelong program analysis & transformation","author":"Lattner C.","year":"2004","unstructured":"C. Lattner and V. S. Adve . LLVM: A compilation framework for lifelong program analysis & transformation . 2004 . C. Lattner and V. S. Adve. LLVM: A compilation framework for lifelong program analysis & transformation. 2004."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287624.3287717"},{"key":"e_1_3_2_1_31_1","volume-title":"FlexCL: A model of performance and power for OpenCL workloads on FPGAs. TC","author":"Liang Y.","year":"2018","unstructured":"Y. Liang , S. Wang , and W. Zhang . FlexCL: A model of performance and power for OpenCL workloads on FPGAs. TC , 2018 . Y. Liang, S. Wang, and W. Zhang. FlexCL: A model of performance and power for OpenCL workloads on FPGAs. TC, 2018."},{"key":"e_1_3_2_1_32_1","volume-title":"a unified development environment for tomorrow's data center","author":"Wirbel Loring","year":"2014","unstructured":"Loring Wirbel . Xilinx SD Accel , a unified development environment for tomorrow's data center . 2014 . Loring Wirbel. Xilinx SDAccel, a unified development environment for tomorrow's data center. 2014."},{"key":"e_1_3_2_1_33_1","volume-title":"SC","author":"Magni A.","year":"2013","unstructured":"A. Magni , C. Dubach , and M. O'Boyle . A large-scale cross-architecture evaluation of thread-coarsening . In SC , 2013 . A. Magni, C. Dubach, and M. O'Boyle. A large-scale cross-architecture evaluation of thread-coarsening. In SC, 2013."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPT.2015.7393135"},{"key":"e_1_3_2_1_35_1","volume-title":"NVIDIA Tesla P100. White paper","author":"NVIDIA.","year":"2016","unstructured":"NVIDIA. NVIDIA Tesla P100. White paper , 2016 . NVIDIA. NVIDIA Tesla P100. White paper, 2016."},{"key":"e_1_3_2_1_36_1","volume-title":"November","author":"NVIDIA.","year":"2019","unstructured":"NVIDIA. CUDA C programming guide v. 10.0 , November 2019 . NVIDIA. CUDA C programming guide v. 10.0, November 2019."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPT.2017.8280132"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.sysarc.2013.08.003"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2847263.2847343"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPT.2018.00018"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2016.7577351"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021753"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-78890-6_3"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/2847263.2847276"},{"volume-title":"DE5-Net User Manual","year":"2018","key":"e_1_3_2_1_45_1","unstructured":"Terasic. DE5-Net User Manual , 2018 . Terasic. DE5-Net User Manual, 2018."},{"key":"e_1_3_2_1_46_1","volume-title":"DATE","author":"Tucci L. D.","year":"2017","unstructured":"L. D. Tucci , K. O'Brien , M. Blott , and M. D. Santambrogio . Architectural optimizations for high performance and energy efficient Smith-Waterman implementation on FPGAs using OpenCL . In DATE , 2017 . L. D. Tucci, K. O'Brien, M. Blott, and M. D. Santambrogio. Architectural optimizations for high performance and energy efficient Smith-Waterman implementation on FPGAs using OpenCL . In DATE, 2017."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2018.00028"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2013.6657065"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPT.2017.8280160"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPT.2016.7929189"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3174243.3174253"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3061639.3062251"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2015.7293941"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2016.7446058"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/FPL.2016.7577329"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2017.2653818"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2016.2537805"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3316781.3317875"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021730"},{"volume-title":"Xilinx Alveo U230, product brief","year":"2019","key":"e_1_3_2_1_60_1","unstructured":"Xilinx. Xilinx Alveo U230, product brief , 2019 . Xilinx. Xilinx Alveo U230, product brief, 2019."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2017.8091078"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289602.3293902"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021698"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD.2017.8203809"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3020078.3021741"},{"key":"e_1_3_2_1_66_1","volume-title":"SC","author":"Zohouri H. R.","year":"2016","unstructured":"H. R. Zohouri , N. Maruyama , A. Smith , M. Matsuda , and S. Matsuoka . Evaluating and optimizing OpenCL kernels for high performance computing with FPGAs . In SC , 2016 . H. R. Zohouri, N. Maruyama, A. Smith, M. Matsuda, and S. Matsuoka. Evaluating and optimizing OpenCL kernels for high performance computing with FPGAs. In SC, 2016."},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3174243.3174248"}],"event":{"name":"FPGA '20: The 2020 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays","sponsor":["SIGDA ACM Special Interest Group on Design Automation"],"location":"Seaside CA USA","acronym":"FPGA '20"},"container-title":["Proceedings of the 2020 ACM\/SIGDA International Symposium on Field-Programmable Gate Arrays"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3373087.3375313","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,11]],"date-time":"2023-01-11T18:30:41Z","timestamp":1673461841000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3373087.3375313"}},"subtitle":["A Systematic Framework for Automatically Deciding the Right Execution Model of OpenCL Applications on FPGAs"],"short-title":[],"issued":{"date-parts":[[2020,2,23]]},"references-count":67,"alternative-id":["10.1145\/3373087.3375313","10.1145\/3373087"],"URL":"https:\/\/doi.org\/10.1145\/3373087.3375313","relation":{},"subject":[],"published":{"date-parts":[[2020,2,23]]},"assertion":[{"value":"2020-02-24","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}