{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,11,19]],"date-time":"2024-11-19T19:10:35Z","timestamp":1732043435335},"reference-count":42,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,1]],"date-time":"2024-09-01T00:00:00Z","timestamp":1725148800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Future Generation Computer Systems"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1016\/j.future.2024.04.038","type":"journal-article","created":{"date-parts":[[2024,4,21]],"date-time":"2024-04-21T02:52:41Z","timestamp":1713667961000},"page":"138-149","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"title":["ADS-CNN: Adaptive Dataflow Scheduling for lightweight CNN accelerator on FPGAs"],"prefix":"10.1016","volume":"158","author":[{"given":"Yi","family":"Wan","sequence":"first","affiliation":[]},{"given":"Xianzhong","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Junfan","family":"Chen","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0001-9857-5352","authenticated-orcid":false,"given":"Kunpeng","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Dezhi","family":"Yi","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0003-0805-6394","authenticated-orcid":false,"given":"Ye","family":"Lu","sequence":"additional","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0001-6784-0221","authenticated-orcid":false,"given":"Keke","family":"Gai","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.future.2024.04.038_b1","series-title":"Computer Vision \u2013 ECCV 2018: 15th European Conference, Munich, Germany, September 8\u201314, 2018, Proceedings, Part XIV","first-page":"122","article-title":"ShuffleNet V2: Practical guidelines for efficient CNN architecture design","author":"Ma","year":"2018"},{"key":"10.1016\/j.future.2024.04.038_b2","doi-asserted-by":"crossref","unstructured":"X. Zhang, X. Zhou, M. Lin, J. Sun, ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices, in: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2017, pp. 6848\u20136856.","DOI":"10.1109\/CVPR.2018.00716"},{"key":"10.1016\/j.future.2024.04.038_b3","series-title":"MobileNets: Efficient convolutional neural networks for mobile vision applications","author":"Howard","year":"2017"},{"key":"10.1016\/j.future.2024.04.038_b4","doi-asserted-by":"crossref","unstructured":"M. Sandler, A. Howard, M. Zhu, A. Zhmoginov, L.-C. Chen, MobileNetV2: Inverted Residuals and Linear Bottlenecks, in: 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2018, pp. 4510\u20134520.","DOI":"10.1109\/CVPR.2018.00474"},{"key":"10.1016\/j.future.2024.04.038_b5","doi-asserted-by":"crossref","first-page":"277","DOI":"10.1016\/j.future.2018.09.020","article-title":"FitCNN: A cloud-assisted and low-cost framework for updating CNNs on IoT devices","volume":"91","author":"Liu","year":"2019","journal-title":"Future Gener. Comput. Syst."},{"key":"10.1016\/j.future.2024.04.038_b6","first-page":"1","article-title":"An efficient lightweight CNN acceleration architecture for edge computing based-on FPGA","author":"Wu","year":"2022","journal-title":"Appl. Intell."},{"issue":"1","key":"10.1016\/j.future.2024.04.038_b7","doi-asserted-by":"crossref","first-page":"176","DOI":"10.1038\/s41386-020-0767-z","article-title":"Deep learning for small and big data in psychiatry","volume":"46","author":"Koppe","year":"2021","journal-title":"Neuropsychopharmacology"},{"issue":"8","key":"10.1016\/j.future.2024.04.038_b8","first-page":"68","article-title":"Dynamic dataflow scheduling and computation mapping techniques for efficient depthwise separable convolution acceleration","author":"Li","year":"2021","journal-title":"IEEE Trans. Circuits Syst. I. Regul. Pap.: Publ. IEEE Circuits Syst. Soc."},{"key":"10.1016\/j.future.2024.04.038_b9","doi-asserted-by":"crossref","unstructured":"M. Ferianc, Z. Que, H. Fan, W. Luk, M. Rodrigues, Optimizing Bayesian Recurrent Neural Networks on an FPGA-based Accelerator, in: 2021 International Conference on Field-Programmable Technology, ICFPT, 2021, pp. 1\u201310.","DOI":"10.1109\/ICFPT52863.2021.9609847"},{"issue":"1","key":"10.1016\/j.future.2024.04.038_b10","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/s40537-021-00444-8","article-title":"Review of deep learning: Concepts, CNN architectures, challenges, applications, future directions","volume":"8","author":"Alzubaidi","year":"2021","journal-title":"J. Big Data"},{"key":"10.1016\/j.future.2024.04.038_b11","series-title":"Light-OPU: An FPGA-based overlay processor for lightweight convolutional neural networks","first-page":"122","author":"Yu","year":"2020"},{"key":"10.1016\/j.future.2024.04.038_b12","doi-asserted-by":"crossref","unstructured":"D. Wu, Y. Zhang, X. Jia, L. Tian, Y. Shan, A High-Performance CNN Processor Based on FPGA for MobileNets, in: 2019 29th International Conference on Field Programmable Logic and Applications, FPL, 2019.","DOI":"10.1109\/FPL.2019.00030"},{"issue":"1","key":"10.1016\/j.future.2024.04.038_b13","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1109\/JSSC.2016.2616357","article-title":"Eyeriss: An energy-efficient reconfigurable accelerator for deep convolutional neural networks","volume":"52","author":"Chen","year":"2017","journal-title":"IEEE J. Solid-State Circuits"},{"key":"10.1016\/j.future.2024.04.038_b14","doi-asserted-by":"crossref","unstructured":"M. Horowitz, 1.1 Computing\u2019s energy problem (and what we can do about it), in: 2014 IEEE International Solid- State Circuits Conference, ISSCC, 2014, pp. 10\u201314.","DOI":"10.1109\/ISSCC.2014.6757323"},{"key":"10.1016\/j.future.2024.04.038_b15","doi-asserted-by":"crossref","unstructured":"K.T. Malladi, F.A. Nothaft, K. Periyathambi, B.C. Lee, C. Kozyrakis, M. Horowitz, Towards energy-proportional datacenter memory with mobile DRAM, in: 2012 39th Annual International Symposium on Computer Architecture, ISCA, 2012, pp. 37\u201348.","DOI":"10.1109\/ISCA.2012.6237004"},{"issue":"3","key":"10.1016\/j.future.2024.04.038_b16","doi-asserted-by":"crossref","first-page":"281","DOI":"10.3390\/electronics8030281","article-title":"An FPGA-based CNN accelerator integrating depthwise separable convolution","volume":"8","author":"Bing","year":"2019","journal-title":"Electronics"},{"key":"10.1016\/j.future.2024.04.038_b17","doi-asserted-by":"crossref","unstructured":"X. Chen, Y. Han, Y. Wang, Communication Lower Bound in Convolution Accelerators, in: 2020 IEEE International Symposium on High Performance Computer Architecture, HPCA, 2020, pp. 529\u2013541.","DOI":"10.1109\/HPCA47549.2020.00050"},{"issue":"10","key":"10.1016\/j.future.2024.04.038_b18","first-page":"1415","article-title":"A CNN accelerator on FPGA using depthwise separable convolution","volume":"65","author":"Bai","year":"2018","journal-title":"IEEE Trans. Circuits Syst. II"},{"key":"10.1016\/j.future.2024.04.038_b19","doi-asserted-by":"crossref","unstructured":"F. Chollet, Xception: Deep Learning with Depthwise Separable Convolutions, in: 2017 IEEE Conference on Computer Vision and Pattern Recognition, CVPR, 2017.","DOI":"10.1109\/CVPR.2017.195"},{"key":"10.1016\/j.future.2024.04.038_b20","series-title":"Redundancy-Reduced MobileNet Acceleration on Reconfigurable Logic for ImageNet Classification","first-page":"16","author":"Jiang","year":"2018"},{"key":"10.1016\/j.future.2024.04.038_b21","doi-asserted-by":"crossref","unstructured":"R. Zhao, X. Niu, W. Luk, Automatic Optimising CNN with Depthwise Separable Convolution on FPGA: (Abstact Only), in: The 2018 ACM\/SIGDA International Symposium, 2018, p. 285.","DOI":"10.1145\/3174243.3174959"},{"key":"10.1016\/j.future.2024.04.038_b22","doi-asserted-by":"crossref","first-page":"278","DOI":"10.1016\/j.sysarc.2018.12.008","article-title":"Designing efficient accelerator of depthwise separable convolutional neural network on FPGA","volume":"97","author":"Ding","year":"2019","journal-title":"J. Syst. Archit."},{"issue":"21","key":"10.1016\/j.future.2024.04.038_b23","doi-asserted-by":"crossref","first-page":"21357","DOI":"10.1109\/JIOT.2022.3179016","article-title":"FitNN: A low-resource FPGA-based CNN accelerator for drones","volume":"9","author":"Zhang","year":"2022","journal-title":"IEEE Internet Things J."},{"issue":"2","key":"10.1016\/j.future.2024.04.038_b24","doi-asserted-by":"crossref","first-page":"292","DOI":"10.1109\/JETCAS.2019.2910232","article-title":"Eyeriss v2: A flexible accelerator for emerging deep neural networks on mobile devices","volume":"9","author":"Chen","year":"2019","journal-title":"IEEE J. Emerg. Sel. Top. Circuits Syst."},{"issue":"4","key":"10.1016\/j.future.2024.04.038_b25","article-title":"SparkNoC: An energy-efficiency FPGA-based accelerator using optimized lightweight CNN for edge computing","volume":"115","author":"Xia","year":"2021","journal-title":"J. Syst. Archit."},{"issue":"4","key":"10.1016\/j.future.2024.04.038_b26","doi-asserted-by":"crossref","first-page":"666","DOI":"10.1109\/TCAD.2020.3006183","article-title":"FPGA acceleration for 3-D low-dose tomographic reconstruction","volume":"40","author":"Zhang","year":"2021","journal-title":"IEEE Trans. Comput.-Aided Des. Integr. Circuits Syst."},{"issue":"3","key":"10.1016\/j.future.2024.04.038_b27","doi-asserted-by":"crossref","first-page":"P.37","DOI":"10.1145\/1816038.1815968","article-title":"Understanding sources of inefficiency in general-purpose chips","volume":"38","author":"Hameed","year":"2010","journal-title":"Comput. Archit. News"},{"issue":"5","key":"10.1016\/j.future.2024.04.038_b28","doi-asserted-by":"crossref","first-page":"7","DOI":"10.1109\/MM.2011.89","article-title":"GPUs and the future of parallel computing","volume":"31","author":"Keckler","year":"2011","journal-title":"IEEE Micro"},{"key":"10.1016\/j.future.2024.04.038_b29","unstructured":"N.P. Jouppi, C. Young, N. Patil, D. Patterson, G. Agrawal, et al., In-datacenter performance analysis of a tensor processing unit, in: 2017 ACM\/IEEE 44th Annual International Symposium on Computer Architecture, ISCA, 2017."},{"issue":"11","key":"10.1016\/j.future.2024.04.038_b30","doi-asserted-by":"crossref","first-page":"2461","DOI":"10.1109\/TCSVT.2016.2592330","article-title":"Origami: A 803-GOp\/s\/W convolutional network accelerator","volume":"27","author":"Cavigelli","year":"2017","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.future.2024.04.038_b31","doi-asserted-by":"crossref","unstructured":"B. Moons, M. Verhelst, A 0.3\u20132.6 TOPS\/W precision-scalable processor for real-time large-scale ConvNets, in: 2016 IEEE Symposium on VLSI Circuits (VLSI-Circuits), 2016, pp. 1\u20132.","DOI":"10.1109\/VLSIC.2016.7573525"},{"key":"10.1016\/j.future.2024.04.038_b32","doi-asserted-by":"crossref","unstructured":"Z. Du, R. Fasthuber, T. Chen, P. Ienne, L. Li, T. Luo, X. Feng, Y. Chen, O. Temam, ShiDianNao: Shifting vision processing closer to the sensor, in: 2015 ACM\/IEEE 42nd Annual International Symposium on Computer Architecture, ISCA, 2015, pp. 92\u2013104.","DOI":"10.1145\/2749469.2750389"},{"issue":"99","key":"10.1016\/j.future.2024.04.038_b33","doi-asserted-by":"crossref","first-page":"420","DOI":"10.1109\/TPDS.2017.2752706","article-title":"Neurostream: Scalable and energy efficient deep learning with smart memory cubes","volume":"PP","author":"Azarkhish","year":"2018","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"10.1016\/j.future.2024.04.038_b34","series-title":"Neuromorphic computing with multi-memristive synapses","author":"Boybat","year":"2017"},{"issue":"1","key":"10.1016\/j.future.2024.04.038_b35","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3363554","article-title":"Scalable deep learning on distributed infrastructures: Challenges, techniques, and tools","volume":"53","author":"Mayer","year":"2020","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.future.2024.04.038_b36","doi-asserted-by":"crossref","unstructured":"J.-W. Chang, S.-J. Kang, Optimizing FPGA-based convolutional neural networks accelerator for image super-resolution, in: 2018 23rd Asia and South Pacific Design Automation Conference (ASP-DAC), 2018, pp. 343\u2013348.","DOI":"10.1109\/ASPDAC.2018.8297347"},{"key":"10.1016\/j.future.2024.04.038_b37","doi-asserted-by":"crossref","unstructured":"X. Xie, F. Sun, J. Lin, Z. Wang, Fast-ABC: A Fast Architecture for Bottleneck-Like Based Convolutional Neural Networks, in: 2019 IEEE Computer Society Annual Symposium on VLSI, ISVLSI, 2019, pp. 1\u20136.","DOI":"10.1109\/ISVLSI.2019.00010"},{"issue":"2","key":"10.1016\/j.future.2024.04.038_b38","doi-asserted-by":"crossref","first-page":"326","DOI":"10.1109\/TNNLS.2018.2844093","article-title":"FpgaConvNet: Mapping regular and irregular convolutional neural networks on FPGAs","volume":"30","author":"Venieris","year":"2019","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.future.2024.04.038_b39","doi-asserted-by":"crossref","first-page":"105455","DOI":"10.1109\/ACCESS.2020.3000009","article-title":"A novel FPGA accelerator design for real-time and ultra-low power deep convolutional neural networks compared with titan X GPU","volume":"8","author":"Li","year":"2020","journal-title":"IEEE Access"},{"key":"10.1016\/j.future.2024.04.038_b40","doi-asserted-by":"crossref","unstructured":"H. Yu, S. Li, A Higher Performance Accelerator for Resource-Limited FPGA to Deploy Deeper Object Detection Networks, in: 2022 IEEE 16th International Conference on Anti-Counterfeiting, Security, and Identification, ASID, 2022, pp. 1\u20135.","DOI":"10.1109\/ASID56930.2022.9995953"},{"issue":"11","key":"10.1016\/j.future.2024.04.038_b41","doi-asserted-by":"crossref","first-page":"2601","DOI":"10.1109\/TCAD.2018.2857078","article-title":"MALOC: A fully pipelined FPGA accelerator for convolutional neural networks with all layers mapped on chip","volume":"37","author":"Gong","year":"2018","journal-title":"IEEE Trans. Comput.-Aided Des. Integr. Circuits Syst."},{"key":"10.1016\/j.future.2024.04.038_b42","doi-asserted-by":"crossref","first-page":"59438","DOI":"10.1109\/ACCESS.2023.3285279","article-title":"A reconfigurable CNN-based accelerator design for fast and energy-efficient object detection system on mobile FPGA","volume":"11","author":"Kim","year":"2023","journal-title":"IEEE Access"}],"container-title":["Future Generation Computer Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167739X24001675?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167739X24001675?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,5,24]],"date-time":"2024-05-24T22:11:53Z","timestamp":1716588713000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167739X24001675"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9]]},"references-count":42,"alternative-id":["S0167739X24001675"],"URL":"https:\/\/doi.org\/10.1016\/j.future.2024.04.038","relation":{},"ISSN":["0167-739X"],"issn-type":[{"value":"0167-739X","type":"print"}],"subject":[],"published":{"date-parts":[[2024,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"ADS-CNN: Adaptive Dataflow Scheduling for lightweight CNN accelerator on FPGAs","name":"articletitle","label":"Article Title"},{"value":"Future Generation Computer Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.future.2024.04.038","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}]}}